diff options
Diffstat (limited to 'tools')
111 files changed, 2344 insertions, 337 deletions
diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h index a4217c1a5d01..2769360f195c 100644 --- a/tools/arch/arm/include/uapi/asm/kvm.h +++ b/tools/arch/arm/include/uapi/asm/kvm.h @@ -266,8 +266,10 @@ struct kvm_vcpu_events { #define KVM_DEV_ARM_ITS_CTRL_RESET 4 /* KVM_IRQ_LINE irq field index values */ +#define KVM_ARM_IRQ_VCPU2_SHIFT 28 +#define KVM_ARM_IRQ_VCPU2_MASK 0xf #define KVM_ARM_IRQ_TYPE_SHIFT 24 -#define KVM_ARM_IRQ_TYPE_MASK 0xff +#define KVM_ARM_IRQ_TYPE_MASK 0xf #define KVM_ARM_IRQ_VCPU_SHIFT 16 #define KVM_ARM_IRQ_VCPU_MASK 0xff #define KVM_ARM_IRQ_NUM_SHIFT 0 diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 9a507716ae2f..67c21f9bdbad 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -325,8 +325,10 @@ struct kvm_vcpu_events { #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 /* KVM_IRQ_LINE irq field index values */ +#define KVM_ARM_IRQ_VCPU2_SHIFT 28 +#define KVM_ARM_IRQ_VCPU2_MASK 0xf #define KVM_ARM_IRQ_TYPE_SHIFT 24 -#define KVM_ARM_IRQ_TYPE_MASK 0xff +#define KVM_ARM_IRQ_TYPE_MASK 0xf #define KVM_ARM_IRQ_VCPU_SHIFT 16 #define KVM_ARM_IRQ_VCPU_MASK 0xff #define KVM_ARM_IRQ_NUM_SHIFT 0 diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h index 47104e5b47fd..436ec7636927 100644 --- a/tools/arch/s390/include/uapi/asm/kvm.h +++ b/tools/arch/s390/include/uapi/asm/kvm.h @@ -231,6 +231,12 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_GSCB (1UL << 9) #define KVM_SYNC_BPBC (1UL << 10) #define KVM_SYNC_ETOKEN (1UL << 11) + +#define KVM_SYNC_S390_VALID_FIELDS \ + (KVM_SYNC_PREFIX | KVM_SYNC_GPRS | KVM_SYNC_ACRS | KVM_SYNC_CRS | \ + KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT | KVM_SYNC_VRS | KVM_SYNC_RICCB | \ + KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN) + /* length and alignment of the sdnx as a power of two */ #define SDNXC 8 #define SDNXL (1UL << SDNXC) diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index a9731f8a480f..2e8a30f06c74 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -75,6 +75,7 @@ #define SVM_EXIT_MWAIT 0x08b #define SVM_EXIT_MWAIT_COND 0x08c #define SVM_EXIT_XSETBV 0x08d +#define SVM_EXIT_RDPRU 0x08e #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index f0b0c90dd398..3eb8411ab60e 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -31,6 +31,7 @@ #define EXIT_REASON_EXCEPTION_NMI 0 #define EXIT_REASON_EXTERNAL_INTERRUPT 1 #define EXIT_REASON_TRIPLE_FAULT 2 +#define EXIT_REASON_INIT_SIGNAL 3 #define EXIT_REASON_PENDING_INTERRUPT 7 #define EXIT_REASON_NMI_WINDOW 8 @@ -85,11 +86,14 @@ #define EXIT_REASON_PML_FULL 62 #define EXIT_REASON_XSAVES 63 #define EXIT_REASON_XRSTORS 64 +#define EXIT_REASON_UMWAIT 67 +#define EXIT_REASON_TPAUSE 68 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ + { EXIT_REASON_INIT_SIGNAL, "INIT_SIGNAL" }, \ { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \ { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ @@ -142,7 +146,9 @@ { EXIT_REASON_RDSEED, "RDSEED" }, \ { EXIT_REASON_PML_FULL, "PML_FULL" }, \ { EXIT_REASON_XSAVES, "XSAVES" }, \ - { EXIT_REASON_XRSTORS, "XRSTORS" } + { EXIT_REASON_XRSTORS, "XRSTORS" }, \ + { EXIT_REASON_UMWAIT, "UMWAIT" }, \ + { EXIT_REASON_TPAUSE, "TPAUSE" } #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2 diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile index fbf5e4a0cb9c..5d1995fd369c 100644 --- a/tools/bpf/Makefile +++ b/tools/bpf/Makefile @@ -12,7 +12,11 @@ INSTALL ?= install CFLAGS += -Wall -O2 CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/include/uapi -I$(srctree)/include -ifeq ($(srctree),) +# This will work when bpf is built in tools env. where srctree +# isn't set and when invoked from selftests build, where srctree +# is set to ".". building_out_of_srctree is undefined for in srctree +# builds +ifndef building_out_of_srctree srctree := $(patsubst %/,%,$(dir $(CURDIR))) srctree := $(patsubst %/,%,$(dir $(srctree))) endif diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index 63b1f506ea67..c160a5354eb6 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -67,6 +67,9 @@ #define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */ #define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ +#define MADV_COLD 20 /* deactivate these pages */ +#define MADV_PAGEOUT 21 /* reclaim these pages */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 328d05e77d9f..469dc512cca3 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -521,6 +521,7 @@ typedef struct drm_i915_irq_wait { #define I915_SCHEDULER_CAP_PRIORITY (1ul << 1) #define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) #define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3) +#define I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4) #define I915_PARAM_HUC_STATUS 42 diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h index 2a616aa3f686..379a612f8f1d 100644 --- a/tools/include/uapi/linux/fs.h +++ b/tools/include/uapi/linux/fs.h @@ -13,6 +13,9 @@ #include <linux/limits.h> #include <linux/ioctl.h> #include <linux/types.h> +#ifndef __KERNEL__ +#include <linux/fscrypt.h> +#endif /* Use of MS_* flags within the kernel is restricted to core mount(2) code. */ #if !defined(__KERNEL__) @@ -213,57 +216,6 @@ struct fsxattr { #define FS_IOC_SETFSLABEL _IOW(0x94, 50, char[FSLABEL_MAX]) /* - * File system encryption support - */ -/* Policy provided via an ioctl on the topmost directory */ -#define FS_KEY_DESCRIPTOR_SIZE 8 - -#define FS_POLICY_FLAGS_PAD_4 0x00 -#define FS_POLICY_FLAGS_PAD_8 0x01 -#define FS_POLICY_FLAGS_PAD_16 0x02 -#define FS_POLICY_FLAGS_PAD_32 0x03 -#define FS_POLICY_FLAGS_PAD_MASK 0x03 -#define FS_POLICY_FLAG_DIRECT_KEY 0x04 /* use master key directly */ -#define FS_POLICY_FLAGS_VALID 0x07 - -/* Encryption algorithms */ -#define FS_ENCRYPTION_MODE_INVALID 0 -#define FS_ENCRYPTION_MODE_AES_256_XTS 1 -#define FS_ENCRYPTION_MODE_AES_256_GCM 2 -#define FS_ENCRYPTION_MODE_AES_256_CBC 3 -#define FS_ENCRYPTION_MODE_AES_256_CTS 4 -#define FS_ENCRYPTION_MODE_AES_128_CBC 5 -#define FS_ENCRYPTION_MODE_AES_128_CTS 6 -#define FS_ENCRYPTION_MODE_SPECK128_256_XTS 7 /* Removed, do not use. */ -#define FS_ENCRYPTION_MODE_SPECK128_256_CTS 8 /* Removed, do not use. */ -#define FS_ENCRYPTION_MODE_ADIANTUM 9 - -struct fscrypt_policy { - __u8 version; - __u8 contents_encryption_mode; - __u8 filenames_encryption_mode; - __u8 flags; - __u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE]; -}; - -#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy) -#define FS_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16]) -#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy) - -/* Parameters for passing an encryption key into the kernel keyring */ -#define FS_KEY_DESC_PREFIX "fscrypt:" -#define FS_KEY_DESC_PREFIX_SIZE 8 - -/* Structure that userspace passes to the kernel keyring */ -#define FS_MAX_KEY_SIZE 64 - -struct fscrypt_key { - __u32 mode; - __u8 raw[FS_MAX_KEY_SIZE]; - __u32 size; -}; - -/* * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) * * Note: for historical reasons, these flags were originally used and @@ -306,6 +258,7 @@ struct fscrypt_key { #define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ #define FS_HUGE_FILE_FL 0x00040000 /* Reserved for ext4 */ #define FS_EXTENT_FL 0x00080000 /* Extents */ +#define FS_VERITY_FL 0x00100000 /* Verity protected inode */ #define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */ #define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */ #define FS_NOCOW_FL 0x00800000 /* Do not cow file */ diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h new file mode 100644 index 000000000000..39ccfe9311c3 --- /dev/null +++ b/tools/include/uapi/linux/fscrypt.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * fscrypt user API + * + * These ioctls can be used on filesystems that support fscrypt. See the + * "User API" section of Documentation/filesystems/fscrypt.rst. + */ +#ifndef _UAPI_LINUX_FSCRYPT_H +#define _UAPI_LINUX_FSCRYPT_H + +#include <linux/types.h> + +/* Encryption policy flags */ +#define FSCRYPT_POLICY_FLAGS_PAD_4 0x00 +#define FSCRYPT_POLICY_FLAGS_PAD_8 0x01 +#define FSCRYPT_POLICY_FLAGS_PAD_16 0x02 +#define FSCRYPT_POLICY_FLAGS_PAD_32 0x03 +#define FSCRYPT_POLICY_FLAGS_PAD_MASK 0x03 +#define FSCRYPT_POLICY_FLAG_DIRECT_KEY 0x04 +#define FSCRYPT_POLICY_FLAGS_VALID 0x07 + +/* Encryption algorithms */ +#define FSCRYPT_MODE_AES_256_XTS 1 +#define FSCRYPT_MODE_AES_256_CTS 4 +#define FSCRYPT_MODE_AES_128_CBC 5 +#define FSCRYPT_MODE_AES_128_CTS 6 +#define FSCRYPT_MODE_ADIANTUM 9 +#define __FSCRYPT_MODE_MAX 9 + +/* + * Legacy policy version; ad-hoc KDF and no key verification. + * For new encrypted directories, use fscrypt_policy_v2 instead. + * + * Careful: the .version field for this is actually 0, not 1. + */ +#define FSCRYPT_POLICY_V1 0 +#define FSCRYPT_KEY_DESCRIPTOR_SIZE 8 +struct fscrypt_policy_v1 { + __u8 version; + __u8 contents_encryption_mode; + __u8 filenames_encryption_mode; + __u8 flags; + __u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE]; +}; +#define fscrypt_policy fscrypt_policy_v1 + +/* + * Process-subscribed "logon" key description prefix and payload format. + * Deprecated; prefer FS_IOC_ADD_ENCRYPTION_KEY instead. + */ +#define FSCRYPT_KEY_DESC_PREFIX "fscrypt:" +#define FSCRYPT_KEY_DESC_PREFIX_SIZE 8 +#define FSCRYPT_MAX_KEY_SIZE 64 +struct fscrypt_key { + __u32 mode; + __u8 raw[FSCRYPT_MAX_KEY_SIZE]; + __u32 size; +}; + +/* + * New policy version with HKDF and key verification (recommended). + */ +#define FSCRYPT_POLICY_V2 2 +#define FSCRYPT_KEY_IDENTIFIER_SIZE 16 +struct fscrypt_policy_v2 { + __u8 version; + __u8 contents_encryption_mode; + __u8 filenames_encryption_mode; + __u8 flags; + __u8 __reserved[4]; + __u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]; +}; + +/* Struct passed to FS_IOC_GET_ENCRYPTION_POLICY_EX */ +struct fscrypt_get_policy_ex_arg { + __u64 policy_size; /* input/output */ + union { + __u8 version; + struct fscrypt_policy_v1 v1; + struct fscrypt_policy_v2 v2; + } policy; /* output */ +}; + +/* + * v1 policy keys are specified by an arbitrary 8-byte key "descriptor", + * matching fscrypt_policy_v1::master_key_descriptor. + */ +#define FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR 1 + +/* + * v2 policy keys are specified by a 16-byte key "identifier" which the kernel + * calculates as a cryptographic hash of the key itself, + * matching fscrypt_policy_v2::master_key_identifier. + */ +#define FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER 2 + +/* + * Specifies a key, either for v1 or v2 policies. This doesn't contain the + * actual key itself; this is just the "name" of the key. + */ +struct fscrypt_key_specifier { + __u32 type; /* one of FSCRYPT_KEY_SPEC_TYPE_* */ + __u32 __reserved; + union { + __u8 __reserved[32]; /* reserve some extra space */ + __u8 descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE]; + __u8 identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]; + } u; +}; + +/* Struct passed to FS_IOC_ADD_ENCRYPTION_KEY */ +struct fscrypt_add_key_arg { + struct fscrypt_key_specifier key_spec; + __u32 raw_size; + __u32 __reserved[9]; + __u8 raw[]; +}; + +/* Struct passed to FS_IOC_REMOVE_ENCRYPTION_KEY */ +struct fscrypt_remove_key_arg { + struct fscrypt_key_specifier key_spec; +#define FSCRYPT_KEY_REMOVAL_STATUS_FLAG_FILES_BUSY 0x00000001 +#define FSCRYPT_KEY_REMOVAL_STATUS_FLAG_OTHER_USERS 0x00000002 + __u32 removal_status_flags; /* output */ + __u32 __reserved[5]; +}; + +/* Struct passed to FS_IOC_GET_ENCRYPTION_KEY_STATUS */ +struct fscrypt_get_key_status_arg { + /* input */ + struct fscrypt_key_specifier key_spec; + __u32 __reserved[6]; + + /* output */ +#define FSCRYPT_KEY_STATUS_ABSENT 1 +#define FSCRYPT_KEY_STATUS_PRESENT 2 +#define FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED 3 + __u32 status; +#define FSCRYPT_KEY_STATUS_FLAG_ADDED_BY_SELF 0x00000001 + __u32 status_flags; + __u32 user_count; + __u32 __out_reserved[13]; +}; + +#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy) +#define FS_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16]) +#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy) +#define FS_IOC_GET_ENCRYPTION_POLICY_EX _IOWR('f', 22, __u8[9]) /* size + version */ +#define FS_IOC_ADD_ENCRYPTION_KEY _IOWR('f', 23, struct fscrypt_add_key_arg) +#define FS_IOC_REMOVE_ENCRYPTION_KEY _IOWR('f', 24, struct fscrypt_remove_key_arg) +#define FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS _IOWR('f', 25, struct fscrypt_remove_key_arg) +#define FS_IOC_GET_ENCRYPTION_KEY_STATUS _IOWR('f', 26, struct fscrypt_get_key_status_arg) + +/**********************************************************************/ + +/* old names; don't add anything new here! */ +#ifndef __KERNEL__ +#define FS_KEY_DESCRIPTOR_SIZE FSCRYPT_KEY_DESCRIPTOR_SIZE +#define FS_POLICY_FLAGS_PAD_4 FSCRYPT_POLICY_FLAGS_PAD_4 +#define FS_POLICY_FLAGS_PAD_8 FSCRYPT_POLICY_FLAGS_PAD_8 +#define FS_POLICY_FLAGS_PAD_16 FSCRYPT_POLICY_FLAGS_PAD_16 +#define FS_POLICY_FLAGS_PAD_32 FSCRYPT_POLICY_FLAGS_PAD_32 +#define FS_POLICY_FLAGS_PAD_MASK FSCRYPT_POLICY_FLAGS_PAD_MASK +#define FS_POLICY_FLAG_DIRECT_KEY FSCRYPT_POLICY_FLAG_DIRECT_KEY +#define FS_POLICY_FLAGS_VALID FSCRYPT_POLICY_FLAGS_VALID +#define FS_ENCRYPTION_MODE_INVALID 0 /* never used */ +#define FS_ENCRYPTION_MODE_AES_256_XTS FSCRYPT_MODE_AES_256_XTS +#define FS_ENCRYPTION_MODE_AES_256_GCM 2 /* never used */ +#define FS_ENCRYPTION_MODE_AES_256_CBC 3 /* never used */ +#define FS_ENCRYPTION_MODE_AES_256_CTS FSCRYPT_MODE_AES_256_CTS +#define FS_ENCRYPTION_MODE_AES_128_CBC FSCRYPT_MODE_AES_128_CBC +#define FS_ENCRYPTION_MODE_AES_128_CTS FSCRYPT_MODE_AES_128_CTS +#define FS_ENCRYPTION_MODE_SPECK128_256_XTS 7 /* removed */ +#define FS_ENCRYPTION_MODE_SPECK128_256_CTS 8 /* removed */ +#define FS_ENCRYPTION_MODE_ADIANTUM FSCRYPT_MODE_ADIANTUM +#define FS_KEY_DESC_PREFIX FSCRYPT_KEY_DESC_PREFIX +#define FS_KEY_DESC_PREFIX_SIZE FSCRYPT_KEY_DESC_PREFIX_SIZE +#define FS_MAX_KEY_SIZE FSCRYPT_MAX_KEY_SIZE +#endif /* !__KERNEL__ */ + +#endif /* _UAPI_LINUX_FSCRYPT_H */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 5e3f12d5359e..52641d8ca9e8 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -243,6 +243,8 @@ struct kvm_hyperv_exit { #define KVM_INTERNAL_ERROR_SIMUL_EX 2 /* Encounter unexpected vm-exit due to delivery event. */ #define KVM_INTERNAL_ERROR_DELIVERY_EV 3 +/* Encounter unexpected vm-exit reason */ +#define KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON 4 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { @@ -996,6 +998,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_PTRAUTH_ADDRESS 171 #define KVM_CAP_ARM_PTRAUTH_GENERIC 172 #define KVM_CAP_PMU_EVENT_FILTER 173 +#define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174 +#define KVM_CAP_HYPERV_DIRECT_TLBFLUSH 175 #ifdef KVM_CAP_IRQ_ROUTING @@ -1142,6 +1146,7 @@ struct kvm_dirty_tlb { #define KVM_REG_S390 0x5000000000000000ULL #define KVM_REG_ARM64 0x6000000000000000ULL #define KVM_REG_MIPS 0x7000000000000000ULL +#define KVM_REG_RISCV 0x8000000000000000ULL #define KVM_REG_SIZE_SHIFT 52 #define KVM_REG_SIZE_MASK 0x00f0000000000000ULL diff --git a/tools/include/uapi/linux/sched.h b/tools/include/uapi/linux/sched.h index b3105ac1381a..99335e1f4a27 100644 --- a/tools/include/uapi/linux/sched.h +++ b/tools/include/uapi/linux/sched.h @@ -33,8 +33,31 @@ #define CLONE_NEWNET 0x40000000 /* New network namespace */ #define CLONE_IO 0x80000000 /* Clone io context */ -/* - * Arguments for the clone3 syscall +#ifndef __ASSEMBLY__ +/** + * struct clone_args - arguments for the clone3 syscall + * @flags: Flags for the new process as listed above. + * All flags are valid except for CSIGNAL and + * CLONE_DETACHED. + * @pidfd: If CLONE_PIDFD is set, a pidfd will be + * returned in this argument. + * @child_tid: If CLONE_CHILD_SETTID is set, the TID of the + * child process will be returned in the child's + * memory. + * @parent_tid: If CLONE_PARENT_SETTID is set, the TID of + * the child process will be returned in the + * parent's memory. + * @exit_signal: The exit_signal the parent process will be + * sent when the child exits. + * @stack: Specify the location of the stack for the + * child process. + * @stack_size: The size of the stack for the child process. + * @tls: If CLONE_SETTLS is set, the tls descriptor + * is set to tls. + * + * The structure is versioned by size and thus extensible. + * New struct members must go at the end of the struct and + * must be properly 64bit aligned. */ struct clone_args { __aligned_u64 flags; @@ -46,6 +69,9 @@ struct clone_args { __aligned_u64 stack_size; __aligned_u64 tls; }; +#endif + +#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */ /* * Scheduling policies diff --git a/tools/include/uapi/linux/usbdevice_fs.h b/tools/include/uapi/linux/usbdevice_fs.h index 78efe870c2b7..cf525cddeb94 100644 --- a/tools/include/uapi/linux/usbdevice_fs.h +++ b/tools/include/uapi/linux/usbdevice_fs.h @@ -158,6 +158,7 @@ struct usbdevfs_hub_portinfo { #define USBDEVFS_CAP_MMAP 0x20 #define USBDEVFS_CAP_DROP_PRIVILEGES 0x40 #define USBDEVFS_CAP_CONNINFO_EX 0x80 +#define USBDEVFS_CAP_SUSPEND 0x100 /* USBDEVFS_DISCONNECT_CLAIM flags & struct */ @@ -223,5 +224,8 @@ struct usbdevfs_streams { * extending size of the data returned. */ #define USBDEVFS_CONNINFO_EX(len) _IOC(_IOC_READ, 'U', 32, len) +#define USBDEVFS_FORBID_SUSPEND _IO('U', 33) +#define USBDEVFS_ALLOW_SUSPEND _IO('U', 34) +#define USBDEVFS_WAIT_FOR_RESUME _IO('U', 35) #endif /* _UAPI_LINUX_USBDEVICE_FS_H */ diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index c6f94cffe06e..56ce6292071b 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -8,7 +8,11 @@ LIBBPF_MAJOR_VERSION := $(firstword $(subst ., ,$(LIBBPF_VERSION))) MAKEFLAGS += --no-print-directory -ifeq ($(srctree),) +# This will work when bpf is built in tools env. where srctree +# isn't set and when invoked from selftests build, where srctree +# is a ".". building_out_of_srctree is undefined for in srctree +# builds +ifndef building_out_of_srctree srctree := $(patsubst %/,%,$(dir $(CURDIR))) srctree := $(patsubst %/,%,$(dir $(srctree))) srctree := $(patsubst %/,%,$(dir $(srctree))) @@ -110,6 +114,9 @@ override CFLAGS += $(INCLUDES) override CFLAGS += -fvisibility=hidden override CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 +# flags specific for shared library +SHLIB_FLAGS := -DSHARED + ifeq ($(VERBOSE),1) Q = else @@ -126,14 +133,17 @@ all: export srctree OUTPUT CC LD CFLAGS V include $(srctree)/tools/build/Makefile.include -BPF_IN := $(OUTPUT)libbpf-in.o +SHARED_OBJDIR := $(OUTPUT)sharedobjs/ +STATIC_OBJDIR := $(OUTPUT)staticobjs/ +BPF_IN_SHARED := $(SHARED_OBJDIR)libbpf-in.o +BPF_IN_STATIC := $(STATIC_OBJDIR)libbpf-in.o VERSION_SCRIPT := libbpf.map LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET)) LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) PC_FILE := $(addprefix $(OUTPUT),$(PC_FILE)) -GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN) | \ +GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \ cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$8}' | \ sort -u | wc -l) @@ -155,7 +165,7 @@ all: fixdep all_cmd: $(CMD_TARGETS) check -$(BPF_IN): force elfdep bpfdep +$(BPF_IN_SHARED): force elfdep bpfdep @(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \ (diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true @@ -171,17 +181,20 @@ $(BPF_IN): force elfdep bpfdep @(test -f ../../include/uapi/linux/if_xdp.h -a -f ../../../include/uapi/linux/if_xdp.h && ( \ (diff -B ../../include/uapi/linux/if_xdp.h ../../../include/uapi/linux/if_xdp.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true - $(Q)$(MAKE) $(build)=libbpf + $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)" + +$(BPF_IN_STATIC): force elfdep bpfdep + $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR) $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION) -$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN) +$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED) $(QUIET_LINK)$(CC) --shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \ -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@ @ln -sf $(@F) $(OUTPUT)libbpf.so @ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION) -$(OUTPUT)libbpf.a: $(BPF_IN) +$(OUTPUT)libbpf.a: $(BPF_IN_STATIC) $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ $(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a @@ -197,7 +210,7 @@ check: check_abi check_abi: $(OUTPUT)libbpf.so @if [ "$(GLOBAL_SYM_COUNT)" != "$(VERSIONED_SYM_COUNT)" ]; then \ - echo "Warning: Num of global symbols in $(BPF_IN)" \ + echo "Warning: Num of global symbols in $(BPF_IN_SHARED)" \ "($(GLOBAL_SYM_COUNT)) does NOT match with num of" \ "versioned symbols in $^ ($(VERSIONED_SYM_COUNT))." \ "Please make sure all LIBBPF_API symbols are" \ @@ -255,9 +268,9 @@ config-clean: $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null clean: - $(call QUIET_CLEAN, libbpf) $(RM) $(TARGETS) $(CXX_TEST_TARGET) \ + $(call QUIET_CLEAN, libbpf) $(RM) -rf $(TARGETS) $(CXX_TEST_TARGET) \ *.o *~ *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) .*.d .*.cmd \ - *.pc LIBBPF-CFLAGS + *.pc LIBBPF-CFLAGS $(SHARED_OBJDIR) $(STATIC_OBJDIR) $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 715967762312..ede55fec3618 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -48,6 +48,8 @@ struct btf_dump_type_aux_state { __u8 fwd_emitted: 1; /* whether unique non-duplicate name was already assigned */ __u8 name_resolved: 1; + /* whether type is referenced from any other type */ + __u8 referenced: 1; }; struct btf_dump { @@ -173,6 +175,7 @@ void btf_dump__free(struct btf_dump *d) free(d); } +static int btf_dump_mark_referenced(struct btf_dump *d); static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr); static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id); @@ -213,6 +216,11 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id) /* VOID is special */ d->type_states[0].order_state = ORDERED; d->type_states[0].emit_state = EMITTED; + + /* eagerly determine referenced types for anon enums */ + err = btf_dump_mark_referenced(d); + if (err) + return err; } d->emit_queue_cnt = 0; @@ -226,6 +234,79 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id) return 0; } +/* + * Mark all types that are referenced from any other type. This is used to + * determine top-level anonymous enums that need to be emitted as an + * independent type declarations. + * Anonymous enums come in two flavors: either embedded in a struct's field + * definition, in which case they have to be declared inline as part of field + * type declaration; or as a top-level anonymous enum, typically used for + * declaring global constants. It's impossible to distinguish between two + * without knowning whether given enum type was referenced from other type: + * top-level anonymous enum won't be referenced by anything, while embedded + * one will. + */ +static int btf_dump_mark_referenced(struct btf_dump *d) +{ + int i, j, n = btf__get_nr_types(d->btf); + const struct btf_type *t; + __u16 vlen; + + for (i = 1; i <= n; i++) { + t = btf__type_by_id(d->btf, i); + vlen = btf_vlen(t); + + switch (btf_kind(t)) { + case BTF_KIND_INT: + case BTF_KIND_ENUM: + case BTF_KIND_FWD: + break; + + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + case BTF_KIND_VAR: + d->type_states[t->type].referenced = 1; + break; + + case BTF_KIND_ARRAY: { + const struct btf_array *a = btf_array(t); + + d->type_states[a->index_type].referenced = 1; + d->type_states[a->type].referenced = 1; + break; + } + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + const struct btf_member *m = btf_members(t); + + for (j = 0; j < vlen; j++, m++) + d->type_states[m->type].referenced = 1; + break; + } + case BTF_KIND_FUNC_PROTO: { + const struct btf_param *p = btf_params(t); + + for (j = 0; j < vlen; j++, p++) + d->type_states[p->type].referenced = 1; + break; + } + case BTF_KIND_DATASEC: { + const struct btf_var_secinfo *v = btf_var_secinfos(t); + + for (j = 0; j < vlen; j++, v++) + d->type_states[v->type].referenced = 1; + break; + } + default: + return -EINVAL; + } + } + return 0; +} static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id) { __u32 *new_queue; @@ -395,7 +476,12 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr) } case BTF_KIND_ENUM: case BTF_KIND_FWD: - if (t->name_off != 0) { + /* + * non-anonymous or non-referenced enums are top-level + * declarations and should be emitted. Same logic can be + * applied to FWDs, it won't hurt anyways. + */ + if (t->name_off != 0 || !tstate->referenced) { err = btf_dump_add_emit_queue_id(d, id); if (err) return err; @@ -536,11 +622,6 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id) t = btf__type_by_id(d->btf, id); kind = btf_kind(t); - if (top_level_def && t->name_off == 0) { - pr_warning("unexpected nameless definition, id:[%u]\n", id); - return; - } - if (tstate->emit_state == EMITTING) { if (tstate->fwd_emitted) return; @@ -1167,6 +1248,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, return; } + next_id = decls->ids[decls->cnt - 1]; next_t = btf__type_by_id(d->btf, next_id); multidim = btf_is_array(next_t); /* we need space if we have named non-pointer */ diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 2e83a34f8c79..98216a69c32f 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -34,6 +34,22 @@ (offsetof(TYPE, FIELD) + sizeof(((TYPE *)0)->FIELD)) #endif +/* Symbol versioning is different between static and shared library. + * Properly versioned symbols are needed for shared library, but + * only the symbol of the new version is needed for static library. + */ +#ifdef SHARED +# define COMPAT_VERSION(internal_name, api_name, version) \ + asm(".symver " #internal_name "," #api_name "@" #version); +# define DEFAULT_VERSION(internal_name, api_name, version) \ + asm(".symver " #internal_name "," #api_name "@@" #version); +#else +# define COMPAT_VERSION(internal_name, api_name, version) +# define DEFAULT_VERSION(internal_name, api_name, version) \ + extern typeof(internal_name) api_name \ + __attribute__((alias(#internal_name))); +#endif + extern void libbpf_print(enum libbpf_print_level level, const char *format, ...) __attribute__((format(printf, 2, 3))); diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 842c4fd55859..a902838f9fcc 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -65,7 +65,6 @@ struct xsk_socket { int xsks_map_fd; __u32 queue_id; char ifname[IFNAMSIZ]; - bool zc; }; struct xsk_nl_info { @@ -262,8 +261,8 @@ int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area, return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp, &config); } -asm(".symver xsk_umem__create_v0_0_2, xsk_umem__create@LIBBPF_0.0.2"); -asm(".symver xsk_umem__create_v0_0_4, xsk_umem__create@@LIBBPF_0.0.4"); +COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2) +DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4) static int xsk_load_xdp_prog(struct xsk_socket *xsk) { @@ -491,7 +490,6 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, void *rx_map = NULL, *tx_map = NULL; struct sockaddr_xdp sxdp = {}; struct xdp_mmap_offsets off; - struct xdp_options opts; struct xsk_socket *xsk; socklen_t optlen; int err; @@ -611,15 +609,6 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, xsk->prog_fd = -1; - optlen = sizeof(opts); - err = getsockopt(xsk->fd, SOL_XDP, XDP_OPTIONS, &opts, &optlen); - if (err) { - err = -errno; - goto out_mmap_tx; - } - - xsk->zc = opts.flags & XDP_OPTIONS_ZEROCOPY; - if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { err = xsk_setup_xdp_prog(xsk); if (err) diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile index ed61fb3a46c0..5b2cd5e58df0 100644 --- a/tools/lib/subcmd/Makefile +++ b/tools/lib/subcmd/Makefile @@ -20,7 +20,13 @@ MAKEFLAGS += --no-print-directory LIBFILE = $(OUTPUT)libsubcmd.a CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) -CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC +CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -fPIC + +ifeq ($(DEBUG),0) + ifeq ($(feature-fortify-source), 1) + CFLAGS += -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 + endif +endif ifeq ($(CC_NO_CLANG), 0) CFLAGS += -O3 diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 176f2f084060..044c9a3cb247 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -138,7 +138,6 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, "do_task_dead", "__module_put_and_exit", "complete_and_exit", - "kvm_spurious_fault", "__reiserfs_panic", "lbug_with_loc", "fortify_panic", diff --git a/tools/perf/Documentation/asciidoc.conf b/tools/perf/Documentation/asciidoc.conf index 356b23a40339..2b62ba1e72b7 100644 --- a/tools/perf/Documentation/asciidoc.conf +++ b/tools/perf/Documentation/asciidoc.conf @@ -71,6 +71,9 @@ ifdef::backend-docbook[] [header] template::[header-declarations] <refentry> +ifdef::perf_date[] +<refentryinfo><date>{perf_date}</date></refentryinfo> +endif::perf_date[] <refmeta> <refentrytitle>{mantitle}</refentrytitle> <manvolnum>{manvolnum}</manvolnum> diff --git a/tools/perf/Documentation/jitdump-specification.txt b/tools/perf/Documentation/jitdump-specification.txt index 4c62b0713651..52152d156ad9 100644 --- a/tools/perf/Documentation/jitdump-specification.txt +++ b/tools/perf/Documentation/jitdump-specification.txt @@ -36,8 +36,8 @@ III/ Jitdump file header format Each jitdump file starts with a fixed size header containing the following fields in order: -* uint32_t magic : a magic number tagging the file type. The value is 4-byte long and represents the string "JiTD" in ASCII form. It is 0x4A695444 or 0x4454694a depending on the endianness. The field can be used to detect the endianness of the file -* uint32_t version : a 4-byte value representing the format version. It is currently set to 2 +* uint32_t magic : a magic number tagging the file type. The value is 4-byte long and represents the string "JiTD" in ASCII form. It written is as 0x4A695444. The reader will detect an endian mismatch when it reads 0x4454694a. +* uint32_t version : a 4-byte value representing the format version. It is currently set to 1 * uint32_t total_size: size in bytes of file header * uint32_t elf_mach : ELF architecture encoding (ELF e_machine value as specified in /usr/include/elf.h) * uint32_t pad1 : padding. Reserved for future use diff --git a/tools/perf/arch/arm/annotate/instructions.c b/tools/perf/arch/arm/annotate/instructions.c index e1d4b484cc4b..2ff6cedeb9c5 100644 --- a/tools/perf/arch/arm/annotate/instructions.c +++ b/tools/perf/arch/arm/annotate/instructions.c @@ -37,7 +37,7 @@ static int arm__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arm = zalloc(sizeof(*arm)); if (!arm) - return -1; + return ENOMEM; #define ARM_CONDS "(cc|cs|eq|ge|gt|hi|le|ls|lt|mi|ne|pl|vc|vs)" err = regcomp(&arm->call_insn, "^blx?" ARM_CONDS "?$", REG_EXTENDED); @@ -59,5 +59,5 @@ out_free_call: regfree(&arm->call_insn); out_free_arm: free(arm); - return -1; + return SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP; } diff --git a/tools/perf/arch/arm64/annotate/instructions.c b/tools/perf/arch/arm64/annotate/instructions.c index 43aa93ed8414..037e292ecd8e 100644 --- a/tools/perf/arch/arm64/annotate/instructions.c +++ b/tools/perf/arch/arm64/annotate/instructions.c @@ -95,7 +95,7 @@ static int arm64__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arm = zalloc(sizeof(*arm)); if (!arm) - return -1; + return ENOMEM; /* bl, blr */ err = regcomp(&arm->call_insn, "^blr?$", REG_EXTENDED); @@ -118,5 +118,5 @@ out_free_call: regfree(&arm->call_insn); out_free_arm: free(arm); - return -1; + return SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP; } diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index b6b7bc7e31a1..3b4cdfc5efd6 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <sys/types.h> +#include <errno.h> #include <unistd.h> #include <stdio.h> #include <stdlib.h> @@ -30,7 +31,7 @@ get_cpuid(char *buffer, size_t sz) buffer[nb-1] = '\0'; return 0; } - return -1; + return ENOBUFS; } char * diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c index 89bb8f2c54ce..a50e70baf918 100644 --- a/tools/perf/arch/s390/annotate/instructions.c +++ b/tools/perf/arch/s390/annotate/instructions.c @@ -164,8 +164,10 @@ static int s390__annotate_init(struct arch *arch, char *cpuid __maybe_unused) if (!arch->initialized) { arch->initialized = true; arch->associate_instruction_ops = s390__associate_ins_ops; - if (cpuid) - err = s390__cpuid_parse(arch, cpuid); + if (cpuid) { + if (s390__cpuid_parse(arch, cpuid)) + err = SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING; + } } return err; diff --git a/tools/perf/arch/s390/util/header.c b/tools/perf/arch/s390/util/header.c index 8b0b018d896a..7933f6871c81 100644 --- a/tools/perf/arch/s390/util/header.c +++ b/tools/perf/arch/s390/util/header.c @@ -8,6 +8,7 @@ */ #include <sys/types.h> +#include <errno.h> #include <unistd.h> #include <stdio.h> #include <string.h> @@ -54,7 +55,7 @@ int get_cpuid(char *buffer, size_t sz) sysinfo = fopen(SYSINFO, "r"); if (sysinfo == NULL) - return -1; + return errno; while ((read = getline(&line, &line_sz, sysinfo)) != -1) { if (!strncmp(line, SYSINFO_MANU, strlen(SYSINFO_MANU))) { @@ -89,7 +90,7 @@ int get_cpuid(char *buffer, size_t sz) /* Missing manufacturer, type or model information should not happen */ if (!manufacturer[0] || !type[0] || !model[0]) - return -1; + return EINVAL; /* * Scan /proc/service_levels and return the CPU-MF counter facility @@ -133,14 +134,14 @@ skip_sysinfo: else nbytes = snprintf(buffer, sz, "%s,%s,%s", manufacturer, type, model); - return (nbytes >= sz) ? -1 : 0; + return (nbytes >= sz) ? ENOBUFS : 0; } char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused) { char *buf = malloc(128); - if (buf && get_cpuid(buf, 128) < 0) + if (buf && get_cpuid(buf, 128)) zfree(&buf); return buf; } diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c index 44f5aba78210..7eb5621c021d 100644 --- a/tools/perf/arch/x86/annotate/instructions.c +++ b/tools/perf/arch/x86/annotate/instructions.c @@ -196,8 +196,10 @@ static int x86__annotate_init(struct arch *arch, char *cpuid) if (arch->initialized) return 0; - if (cpuid) - err = x86__cpuid_parse(arch, cpuid); + if (cpuid) { + if (x86__cpuid_parse(arch, cpuid)) + err = SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING; + } arch->initialized = true; return err; diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c index 662ecf84a421..aa6deb463bf3 100644 --- a/tools/perf/arch/x86/util/header.c +++ b/tools/perf/arch/x86/util/header.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <sys/types.h> +#include <errno.h> #include <unistd.h> #include <stdio.h> #include <stdlib.h> @@ -58,7 +59,7 @@ __get_cpuid(char *buffer, size_t sz, const char *fmt) buffer[nb-1] = '\0'; return 0; } - return -1; + return ENOBUFS; } int diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 3542b6ab9813..e69f44941aad 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2635,6 +2635,7 @@ static int build_cl_output(char *cl_sort, bool no_source) bool add_sym = false; bool add_dso = false; bool add_src = false; + int ret = 0; if (!buf) return -ENOMEM; @@ -2653,7 +2654,8 @@ static int build_cl_output(char *cl_sort, bool no_source) add_dso = true; } else if (strcmp(tok, "offset")) { pr_err("unrecognized sort token: %s\n", tok); - return -EINVAL; + ret = -EINVAL; + goto err; } } @@ -2676,13 +2678,15 @@ static int build_cl_output(char *cl_sort, bool no_source) add_sym ? "symbol," : "", add_dso ? "dso," : "", add_src ? "cl_srcline," : "", - "node") < 0) - return -ENOMEM; + "node") < 0) { + ret = -ENOMEM; + goto err; + } c2c.show_src = add_src; - +err: free(buf); - return 0; + return ret; } static int setup_coalesce(const char *coalesce, bool no_source) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 1e61e353f579..9661671cc26e 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -691,6 +691,7 @@ static char *compact_gfp_flags(char *gfp_flags) new = realloc(new_flags, len + strlen(cpt) + 2); if (new == NULL) { free(new_flags); + free(orig_flags); return NULL; } diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 2227e2f42c09..58a9e0989491 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -705,14 +705,15 @@ static int process_sample_event(struct perf_tool *tool, static int cpu_isa_config(struct perf_kvm_stat *kvm) { - char buf[64], *cpuid; + char buf[128], *cpuid; int err; if (kvm->live) { err = get_cpuid(buf, sizeof(buf)); if (err != 0) { - pr_err("Failed to look up CPU type\n"); - return err; + pr_err("Failed to look up CPU type: %s\n", + str_error_r(err, buf, sizeof(buf))); + return -err; } cpuid = buf; } else diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 286fc70d7402..67be8d31afab 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1063,7 +1063,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, continue; insn = 0; - for (off = 0;; off += ilen) { + for (off = 0; off < (unsigned)len; off += ilen) { uint64_t ip = start + off; printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp); @@ -1074,6 +1074,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, printed += print_srccode(thread, x.cpumode, ip); break; } else { + ilen = 0; printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", ip, dump_insn(&x, ip, buffer + off, len - off, &ilen)); if (ilen == 0) @@ -1083,6 +1084,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, insn++; } } + if (off != (unsigned)len) + printed += fprintf(fp, "\tmismatch of LBR data and executable\n"); } /* @@ -1123,6 +1126,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, goto out; } for (off = 0; off <= end - start; off += ilen) { + ilen = 0; printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", start + off, dump_insn(&x, start + off, buffer + off, len - off, &ilen)); if (ilen == 0) diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index e2e0f06c97d0..cea13cb987d0 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -8,6 +8,7 @@ include/uapi/drm/i915_drm.h include/uapi/linux/fadvise.h include/uapi/linux/fcntl.h include/uapi/linux/fs.h +include/uapi/linux/fscrypt.h include/uapi/linux/kcmp.h include/uapi/linux/kvm.h include/uapi/linux/in.h diff --git a/tools/perf/jvmti/Build b/tools/perf/jvmti/Build index 1e148bbdf820..202cadaaf097 100644 --- a/tools/perf/jvmti/Build +++ b/tools/perf/jvmti/Build @@ -2,7 +2,7 @@ jvmti-y += libjvmti.o jvmti-y += jvmti_agent.o # For strlcpy -jvmti-y += libstring.o +jvmti-y += libstring.o libctype.o CFLAGS_jvmti = -fPIC -DPIC -I$(JDIR)/include -I$(JDIR)/include/linux CFLAGS_REMOVE_jvmti = -Wmissing-declarations @@ -15,3 +15,7 @@ CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PE $(OUTPUT)jvmti/libstring.o: ../lib/string.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) + +$(OUTPUT)jvmti/libctype.o: ../lib/ctype.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/basic.json b/tools/perf/pmu-events/arch/s390/cf_z15/basic.json index 17fb5241928b..17fb5241928b 100644 --- a/tools/perf/pmu-events/arch/s390/cf_m8561/basic.json +++ b/tools/perf/pmu-events/arch/s390/cf_z15/basic.json diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z15/crypto.json index db286f19e7b6..db286f19e7b6 100644 --- a/tools/perf/pmu-events/arch/s390/cf_m8561/crypto.json +++ b/tools/perf/pmu-events/arch/s390/cf_z15/crypto.json diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/crypto6.json b/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json index 5e36bc2468d0..5e36bc2468d0 100644 --- a/tools/perf/pmu-events/arch/s390/cf_m8561/crypto6.json +++ b/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/extended.json b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json index 89e070727e1b..89e070727e1b 100644 --- a/tools/perf/pmu-events/arch/s390/cf_m8561/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/transaction.json b/tools/perf/pmu-events/arch/s390/cf_z15/transaction.json new file mode 100644 index 000000000000..1a0034f79f73 --- /dev/null +++ b/tools/perf/pmu-events/arch/s390/cf_z15/transaction.json @@ -0,0 +1,7 @@ +[ + { + "BriefDescription": "Transaction count", + "MetricName": "transaction", + "MetricExpr": "TX_C_TEND + TX_NC_TEND + TX_NC_TABORT + TX_C_TABORT_SPECIAL + TX_C_TABORT_NO_SPECIAL" + } +] diff --git a/tools/perf/pmu-events/arch/s390/mapfile.csv b/tools/perf/pmu-events/arch/s390/mapfile.csv index bd3fc577139c..61641a3480e0 100644 --- a/tools/perf/pmu-events/arch/s390/mapfile.csv +++ b/tools/perf/pmu-events/arch/s390/mapfile.csv @@ -4,4 +4,4 @@ Family-model,Version,Filename,EventType ^IBM.282[78].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_zec12,core ^IBM.296[45].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z13,core ^IBM.390[67].*[13]\.[1-5].[[:xdigit:]]+$,3,cf_z14,core -^IBM.856[12].*3\.6.[[:xdigit:]]+$,3,cf_m8561,core +^IBM.856[12].*3\.6.[[:xdigit:]]+$,3,cf_z15,core diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 9e37287da924..e2837260ca4d 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -450,12 +450,12 @@ static struct fixed { const char *name; const char *event; } fixed[] = { - { "inst_retired.any", "event=0xc0" }, - { "inst_retired.any_p", "event=0xc0" }, - { "cpu_clk_unhalted.ref", "event=0x0,umask=0x03" }, - { "cpu_clk_unhalted.thread", "event=0x3c" }, - { "cpu_clk_unhalted.core", "event=0x3c" }, - { "cpu_clk_unhalted.thread_any", "event=0x3c,any=1" }, + { "inst_retired.any", "event=0xc0,period=2000003" }, + { "inst_retired.any_p", "event=0xc0,period=2000003" }, + { "cpu_clk_unhalted.ref", "event=0x0,umask=0x03,period=2000003" }, + { "cpu_clk_unhalted.thread", "event=0x3c,period=2000003" }, + { "cpu_clk_unhalted.core", "event=0x3c,period=2000003" }, + { "cpu_clk_unhalted.thread_any", "event=0x3c,any=1,period=2000003" }, { NULL, NULL}, }; diff --git a/tools/perf/tests/perf-hooks.c b/tools/perf/tests/perf-hooks.c index dbc27199c65e..dd865e0bea12 100644 --- a/tools/perf/tests/perf-hooks.c +++ b/tools/perf/tests/perf-hooks.c @@ -19,12 +19,11 @@ static void sigsegv_handler(int sig __maybe_unused) static void the_hook(void *_hook_flags) { int *hook_flags = _hook_flags; - int *p = NULL; *hook_flags = 1234; /* Generate a segfault, test perf_hooks__recover */ - *p = 0; + raise(SIGSEGV); } int test__perf_hooks(struct test *test __maybe_unused, int subtest __maybe_unused) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e830eadfca2a..e42bf572358c 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1631,6 +1631,19 @@ int symbol__strerror_disassemble(struct symbol *sym __maybe_unused, struct map * case SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF: scnprintf(buf, buflen, "Please link with binutils's libopcode to enable BPF annotation"); break; + case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP: + scnprintf(buf, buflen, "Problems with arch specific instruction name regular expressions."); + break; + case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING: + scnprintf(buf, buflen, "Problems while parsing the CPUID in the arch specific initialization."); + break; + case SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE: + scnprintf(buf, buflen, "Invalid BPF file: %s.", dso->long_name); + break; + case SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF: + scnprintf(buf, buflen, "The %s BPF file has no BTF section, compile with -g or use pahole -J.", + dso->long_name); + break; default: scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum); break; @@ -1662,7 +1675,7 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil build_id_path = strdup(filename); if (!build_id_path) - return -1; + return ENOMEM; /* * old style build-id cache has name of XX/XXXXXXX.. while @@ -1713,13 +1726,13 @@ static int symbol__disassemble_bpf(struct symbol *sym, char tpath[PATH_MAX]; size_t buf_size; int nr_skip = 0; - int ret = -1; char *buf; bfd *bfdf; + int ret; FILE *s; if (dso->binary_type != DSO_BINARY_TYPE__BPF_PROG_INFO) - return -1; + return SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE; pr_debug("%s: handling sym %s addr %" PRIx64 " len %" PRIx64 "\n", __func__, sym->name, sym->start, sym->end - sym->start); @@ -1732,8 +1745,10 @@ static int symbol__disassemble_bpf(struct symbol *sym, assert(bfd_check_format(bfdf, bfd_object)); s = open_memstream(&buf, &buf_size); - if (!s) + if (!s) { + ret = errno; goto out; + } init_disassemble_info(&info, s, (fprintf_ftype) fprintf); @@ -1742,8 +1757,10 @@ static int symbol__disassemble_bpf(struct symbol *sym, info_node = perf_env__find_bpf_prog_info(dso->bpf_prog.env, dso->bpf_prog.id); - if (!info_node) + if (!info_node) { + ret = SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF; goto out; + } info_linear = info_node->info_linear; sub_id = dso->bpf_prog.sub_id; @@ -2071,11 +2088,11 @@ int symbol__annotate(struct symbol *sym, struct map *map, int err; if (!arch_name) - return -1; + return errno; args.arch = arch = arch__find(arch_name); if (arch == NULL) - return -ENOTSUP; + return ENOTSUP; if (parch) *parch = arch; @@ -2971,7 +2988,7 @@ int symbol__annotate2(struct symbol *sym, struct map *map, struct evsel *evsel, notes->offsets = zalloc(size * sizeof(struct annotation_line *)); if (notes->offsets == NULL) - return -1; + return ENOMEM; if (perf_evsel__is_group_event(evsel)) nr_pcnt = evsel->core.nr_members; @@ -2997,7 +3014,7 @@ int symbol__annotate2(struct symbol *sym, struct map *map, struct evsel *evsel, out_free_offsets: zfree(¬es->offsets); - return -1; + return err; } #define ANNOTATION__CFG(n) \ diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index d94be9140e31..d76fd0e81f46 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -370,6 +370,10 @@ enum symbol_disassemble_errno { SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX = __SYMBOL_ANNOTATE_ERRNO__START, SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF, + SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING, + SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP, + SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE, + SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF, __SYMBOL_ANNOTATE_ERRNO__END, }; diff --git a/tools/perf/util/copyfile.c b/tools/perf/util/copyfile.c index 3fa0db136667..47e03de7c235 100644 --- a/tools/perf/util/copyfile.c +++ b/tools/perf/util/copyfile.c @@ -101,14 +101,16 @@ static int copyfile_mode_ns(const char *from, const char *to, mode_t mode, if (tofd < 0) goto out; - if (fchmod(tofd, mode)) - goto out_close_to; - if (st.st_size == 0) { /* /proc? do it slowly... */ err = slow_copyfile(from, tmp, nsi); + if (!err && fchmod(tofd, mode)) + err = -1; goto out_close_to; } + if (fchmod(tofd, mode)) + goto out_close_to; + nsinfo__mountns_enter(nsi, &nsc); fromfd = open(from, O_RDONLY); nsinfo__mountns_exit(&nsc); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index d277a98e62df..de79c735e441 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1659,7 +1659,7 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list, is_open = false; if (c2->leader == leader) { if (is_open) - perf_evsel__close(&evsel->core); + perf_evsel__close(&c2->core); c2->leader = c2; c2->core.nr_members = 0; } diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 5591af81a070..abc7fda4a0fe 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -30,6 +30,7 @@ #include "counts.h" #include "event.h" #include "evsel.h" +#include "util/env.h" #include "util/evsel_config.h" #include "util/evsel_fprintf.h" #include "evlist.h" @@ -2512,7 +2513,7 @@ struct perf_env *perf_evsel__env(struct evsel *evsel) { if (evsel && evsel->evlist) return evsel->evlist->env; - return NULL; + return &perf_env; } static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 86d9396cb131..becc2d109423 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1296,8 +1296,10 @@ static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp) continue; if (WARN_ONCE(cnt >= size, - "failed to write MEM_TOPOLOGY, way too many nodes\n")) + "failed to write MEM_TOPOLOGY, way too many nodes\n")) { + closedir(dir); return -1; + } ret = memory_node__read(&nodes[cnt++], idx); } diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index 1bdf4c6ea3e5..e3ccb0ce1938 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -395,7 +395,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) size_t size; u16 idr_size; const char *sym; - uint32_t count; + uint64_t count; int ret, csize, usize; pid_t pid, tid; struct { @@ -418,7 +418,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) return -1; filename = event->mmap2.filename; - size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%u.so", + size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%" PRIu64 ".so", jd->dir, pid, count); @@ -529,7 +529,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr) return -1; filename = event->mmap2.filename; - size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%"PRIu64, + size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%" PRIu64 ".so", jd->dir, pid, jr->move.code_index); diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 8d04e3d070b1..8b14e4a7f1dc 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -233,14 +233,14 @@ static int detect_kbuild_dir(char **kbuild_dir) const char *prefix_dir = ""; const char *suffix_dir = ""; + /* _UTSNAME_LENGTH is 65 */ + char release[128]; + char *autoconf_path; int err; if (!test_dir) { - /* _UTSNAME_LENGTH is 65 */ - char release[128]; - err = fetch_kernel_version(NULL, release, sizeof(release)); if (err) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 5b83ed1ebbd6..eec9b282c047 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include "symbol.h" +#include <assert.h> #include <errno.h> #include <inttypes.h> #include <limits.h> @@ -850,6 +851,8 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp } after->start = map->end; + after->pgoff += map->end - pos->start; + assert(pos->map_ip(pos, map->end) == after->map_ip(after, map->end)); __map_groups__insert(pos->groups, after); if (verbose >= 2 && !use_browser) map__fprintf(after, fp); diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 53f31053a27a..02460362256d 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -14,6 +14,7 @@ #include "thread_map.h" #include "trace-event.h" #include "mmap.h" +#include "util/env.h" #include <internal/lib.h> #include "../perf-sys.h" @@ -54,6 +55,11 @@ int parse_callchain_record(const char *arg __maybe_unused, } /* + * Add this one here not to drag util/env.c + */ +struct perf_env perf_env; + +/* * Support debug printing even though util/debug.c is not linked. That means * implementing 'verbose' and 'eprintf'. */ diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 5eda6e19c947..ae56c766eda1 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -154,8 +154,10 @@ static int rm_rf_depth_pat(const char *path, int depth, const char **pat) if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, "..")) continue; - if (!match_pat(d->d_name, pat)) - return -2; + if (!match_pat(d->d_name, pat)) { + ret = -2; + break; + } scnprintf(namebuf, sizeof(namebuf), "%s/%s", path, d->d_name); diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h index 448d686da8b1..0bf5640f1f07 100644 --- a/tools/testing/nvdimm/test/nfit_test.h +++ b/tools/testing/nvdimm/test/nfit_test.h @@ -4,6 +4,7 @@ */ #ifndef __NFIT_TEST_H__ #define __NFIT_TEST_H__ +#include <linux/acpi.h> #include <linux/list.h> #include <linux/uuid.h> #include <linux/ioport.h> @@ -202,9 +203,6 @@ struct nd_intel_lss { __u32 status; } __packed; -union acpi_object; -typedef void *acpi_handle; - typedef struct nfit_test_resource *(*nfit_test_lookup_fn)(resource_size_t); typedef union acpi_object *(*nfit_test_evaluate_dsm_fn)(acpi_handle handle, const guid_t *guid, u64 rev, u64 func, diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index c3feccb99ff5..4cdbae6f4e61 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -63,6 +63,13 @@ TARGETS += zram TARGETS_HOTPLUG = cpu-hotplug TARGETS_HOTPLUG += memory-hotplug +# User can optionally provide a TARGETS skiplist. +SKIP_TARGETS ?= +ifneq ($(SKIP_TARGETS),) + TMP := $(filter-out $(SKIP_TARGETS), $(TARGETS)) + override TARGETS := $(TMP) +endif + # Clear LDFLAGS and MAKEFLAGS if called from main # Makefile to avoid test build failures when test # Makefile doesn't have explicit build rules. @@ -171,9 +178,12 @@ run_pstore_crash: # 1. output_dir=kernel_src # 2. a separate output directory is specified using O= KBUILD_OUTPUT # 3. a separate output directory is specified using KBUILD_OUTPUT +# Avoid conflict with INSTALL_PATH set by the main Makefile # -INSTALL_PATH ?= $(BUILD)/install -INSTALL_PATH := $(abspath $(INSTALL_PATH)) +KSFT_INSTALL_PATH ?= $(BUILD)/kselftest_install +KSFT_INSTALL_PATH := $(abspath $(KSFT_INSTALL_PATH)) +# Avoid changing the rest of the logic here and lib.mk. +INSTALL_PATH := $(KSFT_INSTALL_PATH) ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh install: all @@ -198,11 +208,16 @@ ifdef INSTALL_PATH echo " cat /dev/null > \$$logfile" >> $(ALL_SCRIPT) echo "fi" >> $(ALL_SCRIPT) + @# While building run_kselftest.sh skip also non-existent TARGET dirs: + @# they could be the result of a build failure and should NOT be + @# included in the generated runlist. for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ + [ ! -d $$INSTALL_PATH/$$TARGET ] && echo "Skipping non-existent dir: $$TARGET" && continue; \ echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \ echo "cd $$TARGET" >> $(ALL_SCRIPT); \ echo -n "run_many" >> $(ALL_SCRIPT); \ + echo -n "Emit Tests for $$TARGET\n"; \ $(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \ echo "" >> $(ALL_SCRIPT); \ echo "cd \$$ROOT" >> $(ALL_SCRIPT); \ diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c index 6cbeea7b4bf1..8547ecbdc61f 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c @@ -195,7 +195,7 @@ static void run_test(int cgroup_fd) if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread, (void *)&server_fd))) - goto close_bpf_object; + goto close_server_fd; pthread_mutex_lock(&server_started_mtx); pthread_cond_wait(&server_started, &server_started_mtx); diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c index fdc0b3614a9e..f4cd60d6fba2 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c @@ -203,14 +203,24 @@ static int start_server(void) return fd; } +static pthread_mutex_t server_started_mtx = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t server_started = PTHREAD_COND_INITIALIZER; + static void *server_thread(void *arg) { struct sockaddr_storage addr; socklen_t len = sizeof(addr); int fd = *(int *)arg; int client_fd; + int err; + + err = listen(fd, 1); - if (CHECK_FAIL(listen(fd, 1)) < 0) { + pthread_mutex_lock(&server_started_mtx); + pthread_cond_signal(&server_started); + pthread_mutex_unlock(&server_started_mtx); + + if (CHECK_FAIL(err < 0)) { perror("Failed to listed on socket"); return NULL; } @@ -248,8 +258,16 @@ void test_tcp_rtt(void) if (CHECK_FAIL(server_fd < 0)) goto close_cgroup_fd; - pthread_create(&tid, NULL, server_thread, (void *)&server_fd); + if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread, + (void *)&server_fd))) + goto close_server_fd; + + pthread_mutex_lock(&server_started_mtx); + pthread_cond_wait(&server_started, &server_started_mtx); + pthread_mutex_unlock(&server_started_mtx); + CHECK_FAIL(run_test(cgroup_fd, server_fd)); +close_server_fd: close(server_fd); close_cgroup_fd: close(cgroup_fd); diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index 8a399bdfd920..067eb625d01c 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -413,7 +413,10 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, #else #pragma unroll #endif - for (int i = 0; i < STROBE_MAX_MAP_ENTRIES && i < map.cnt; ++i) { + for (int i = 0; i < STROBE_MAX_MAP_ENTRIES; ++i) { + if (i >= map.cnt) + break; + descr->key_lens[i] = 0; len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, map.entries[i].key); diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh index d23d4da66b83..e2d06191bd35 100755 --- a/tools/testing/selftests/bpf/test_flow_dissector.sh +++ b/tools/testing/selftests/bpf/test_flow_dissector.sh @@ -63,6 +63,9 @@ fi # Setup tc qdisc add dev lo ingress +echo 0 > /proc/sys/net/ipv4/conf/default/rp_filter +echo 0 > /proc/sys/net/ipv4/conf/all/rp_filter +echo 0 > /proc/sys/net/ipv4/conf/lo/rp_filter echo "Testing IPv4..." # Drops all IP/UDP packets coming from port 9 diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh index acf7a74f97cd..59ea56945e6c 100755 --- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh +++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh @@ -314,15 +314,15 @@ test_gso() command -v nc >/dev/null 2>&1 || \ { echo >&2 "nc is not available: skipping TSO tests"; return; } - # listen on IPv*_DST, capture TCP into $TMPFILE + # listen on port 9000, capture TCP into $TMPFILE if [ "${PROTO}" == "IPv4" ] ; then IP_DST=${IPv4_DST} ip netns exec ${NS3} bash -c \ - "nc -4 -l -s ${IPv4_DST} -p 9000 > ${TMPFILE} &" + "nc -4 -l -p 9000 > ${TMPFILE} &" elif [ "${PROTO}" == "IPv6" ] ; then IP_DST=${IPv6_DST} ip netns exec ${NS3} bash -c \ - "nc -6 -l -s ${IPv6_DST} -p 9000 > ${TMPFILE} &" + "nc -6 -l -p 9000 > ${TMPFILE} &" RET=$? else echo " test_gso: unknown PROTO: ${PROTO}" diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 15a666329a34..1afa22c88e42 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -22,6 +22,7 @@ import os import pprint import random import re +import stat import string import struct import subprocess @@ -311,7 +312,11 @@ class DebugfsDir: for f in out.split(): if f == "ports": continue + p = os.path.join(path, f) + if not os.stat(p).st_mode & stat.S_IRUSR: + continue + if os.path.isfile(p): _, out = cmd('cat %s/%s' % (path, f)) dfs[f] = out.strip() diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c index 4f8ec1f10a80..a320e3844b17 100644 --- a/tools/testing/selftests/bpf/test_sysctl.c +++ b/tools/testing/selftests/bpf/test_sysctl.c @@ -1385,7 +1385,6 @@ static int fixup_sysctl_value(const char *buf, size_t buf_len, uint8_t raw[sizeof(uint64_t)]; uint64_t num; } value = {}; - uint8_t c, i; if (buf_len > sizeof(value)) { log_err("Value is too big (%zd) to use in fixup", buf_len); diff --git a/tools/testing/selftests/bpf/test_tc_edt.sh b/tools/testing/selftests/bpf/test_tc_edt.sh index f38567ef694b..daa7d1b8d309 100755 --- a/tools/testing/selftests/bpf/test_tc_edt.sh +++ b/tools/testing/selftests/bpf/test_tc_edt.sh @@ -59,7 +59,7 @@ ip netns exec ${NS_SRC} tc filter add dev veth_src egress \ # start the listener ip netns exec ${NS_DST} bash -c \ - "nc -4 -l -s ${IP_DST} -p 9000 >/dev/null &" + "nc -4 -l -p 9000 >/dev/null &" declare -i NC_PID=$! sleep 1 diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh index 5dcdfa20fc6c..126caf28b529 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh @@ -224,13 +224,6 @@ ingress_vlan_filter_test() local vid=10 bridge vlan add vid $vid dev $swp2 master - # During initialization the firmware enables all the VLAN filters and - # the driver does not turn them off since the traffic will be discarded - # by the STP filter whose default is DISCARD state. Add the VID on the - # ingress bridge port and then remove it to make sure it is not member - # in the VLAN. - bridge vlan add vid $vid dev $swp1 master - bridge vlan del vid $vid dev $swp1 master RET=0 diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc index 8a4025e912cb..ef1e9bafb098 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -95,7 +95,7 @@ echo 'p:kprobes/testevent _do_fork abcd=\1' > kprobe_events check_error 'p:kprobes/testevent _do_fork ^bcd=\1' # DIFF_ARG_TYPE check_error 'p:kprobes/testevent _do_fork ^abcd=\1:u8' # DIFF_ARG_TYPE check_error 'p:kprobes/testevent _do_fork ^abcd=\"foo"' # DIFF_ARG_TYPE -check_error '^p:kprobes/testevent _do_fork' # SAME_PROBE +check_error '^p:kprobes/testevent _do_fork abcd=\1' # SAME_PROBE fi exit 0 diff --git a/tools/testing/selftests/kexec/test_kexec_file_load.sh b/tools/testing/selftests/kexec/test_kexec_file_load.sh index fa7c24e8eefb..2ff600388c30 100755 --- a/tools/testing/selftests/kexec/test_kexec_file_load.sh +++ b/tools/testing/selftests/kexec/test_kexec_file_load.sh @@ -37,11 +37,20 @@ is_ima_sig_required() # sequentially. As a result, a policy rule may be defined, but # might not necessarily be used. This test assumes if a policy # rule is specified, that is the intent. + + # First check for appended signature (modsig), then xattr if [ $ima_read_policy -eq 1 ]; then check_ima_policy "appraise" "func=KEXEC_KERNEL_CHECK" \ - "appraise_type=imasig" + "appraise_type=imasig|modsig" ret=$? - [ $ret -eq 1 ] && log_info "IMA signature required"; + if [ $ret -eq 1 ]; then + log_info "IMA or appended(modsig) signature required" + else + check_ima_policy "appraise" "func=KEXEC_KERNEL_CHECK" \ + "appraise_type=imasig" + ret=$? + [ $ret -eq 1 ] && log_info "IMA signature required"; + fi fi return $ret } @@ -84,6 +93,22 @@ check_for_imasig() return $ret } +# Return 1 for appended signature (modsig) found and 0 for not found. +check_for_modsig() +{ + local module_sig_string="~Module signature appended~" + local sig="$(tail --bytes $((${#module_sig_string} + 1)) $KERNEL_IMAGE)" + local ret=0 + + if [ "$sig" == "$module_sig_string" ]; then + ret=1 + log_info "kexec kernel image modsig signed" + else + log_info "kexec kernel image not modsig signed" + fi + return $ret +} + kexec_file_load_test() { local succeed_msg="kexec_file_load succeeded" @@ -98,7 +123,8 @@ kexec_file_load_test() # In secureboot mode with an architecture specific # policy, make sure either an IMA or PE signature exists. if [ $secureboot -eq 1 ] && [ $arch_policy -eq 1 ] && \ - [ $ima_signed -eq 0 ] && [ $pe_signed -eq 0 ]; then + [ $ima_signed -eq 0 ] && [ $pe_signed -eq 0 ] \ + && [ $ima_modsig -eq 0 ]; then log_fail "$succeed_msg (missing sig)" fi @@ -107,7 +133,8 @@ kexec_file_load_test() log_fail "$succeed_msg (missing PE sig)" fi - if [ $ima_sig_required -eq 1 ] && [ $ima_signed -eq 0 ]; then + if [ $ima_sig_required -eq 1 ] && [ $ima_signed -eq 0 ] \ + && [ $ima_modsig -eq 0 ]; then log_fail "$succeed_msg (missing IMA sig)" fi @@ -204,5 +231,8 @@ pe_signed=$? check_for_imasig ima_signed=$? +check_for_modsig +ima_modsig=$? + # Test loading the kernel image via kexec_file_load syscall kexec_file_load_test diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh index 00c9020bdda8..84de7bc74f2c 100644 --- a/tools/testing/selftests/kselftest/runner.sh +++ b/tools/testing/selftests/kselftest/runner.sh @@ -3,9 +3,14 @@ # # Runs a set of tests in a given subdirectory. export skip_rc=4 +export timeout_rc=124 export logfile=/dev/stdout export per_test_logging= +# Defaults for "settings" file fields: +# "timeout" how many seconds to let each test run before failing. +export kselftest_default_timeout=45 + # There isn't a shell-agnostic way to find the path of a sourced file, # so we must rely on BASE_DIR being set to find other tools. if [ -z "$BASE_DIR" ]; then @@ -24,6 +29,16 @@ tap_prefix() fi } +tap_timeout() +{ + # Make sure tests will time out if utility is available. + if [ -x /usr/bin/timeout ] ; then + /usr/bin/timeout "$kselftest_timeout" "$1" + else + "$1" + fi +} + run_one() { DIR="$1" @@ -32,6 +47,18 @@ run_one() BASENAME_TEST=$(basename $TEST) + # Reset any "settings"-file variables. + export kselftest_timeout="$kselftest_default_timeout" + # Load per-test-directory kselftest "settings" file. + settings="$BASE_DIR/$DIR/settings" + if [ -r "$settings" ] ; then + while read line ; do + field=$(echo "$line" | cut -d= -f1) + value=$(echo "$line" | cut -d= -f2-) + eval "kselftest_$field"="$value" + done < "$settings" + fi + TEST_HDR_MSG="selftests: $DIR: $BASENAME_TEST" echo "# $TEST_HDR_MSG" if [ ! -x "$TEST" ]; then @@ -44,14 +71,17 @@ run_one() echo "not ok $test_num $TEST_HDR_MSG" else cd `dirname $TEST` > /dev/null - (((((./$BASENAME_TEST 2>&1; echo $? >&3) | + ((((( tap_timeout ./$BASENAME_TEST 2>&1; echo $? >&3) | tap_prefix >&4) 3>&1) | (read xs; exit $xs)) 4>>"$logfile" && echo "ok $test_num $TEST_HDR_MSG") || - (if [ $? -eq $skip_rc ]; then \ + (rc=$?; \ + if [ $rc -eq $skip_rc ]; then \ echo "not ok $test_num $TEST_HDR_MSG # SKIP" + elif [ $rc -eq $timeout_rc ]; then \ + echo "not ok $test_num $TEST_HDR_MSG # TIMEOUT" else - echo "not ok $test_num $TEST_HDR_MSG" + echo "not ok $test_num $TEST_HDR_MSG # exit=$rc" fi) cd - >/dev/null fi diff --git a/tools/testing/selftests/kselftest_install.sh b/tools/testing/selftests/kselftest_install.sh index ec304463883c..e2e1911d62d5 100755 --- a/tools/testing/selftests/kselftest_install.sh +++ b/tools/testing/selftests/kselftest_install.sh @@ -24,12 +24,12 @@ main() echo "$0: Installing in specified location - $install_loc ..." fi - install_dir=$install_loc/kselftest + install_dir=$install_loc/kselftest_install # Create install directory mkdir -p $install_dir # Build tests - INSTALL_PATH=$install_dir make install + KSFT_INSTALL_PATH=$install_dir make install } main "$@" diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index b35da375530a..409c1fa75e03 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,4 +1,5 @@ /s390x/sync_regs_test +/s390x/memop /x86_64/cr4_cpuid_sync_test /x86_64/evmcs_test /x86_64/hyperv_cpuid @@ -9,6 +10,7 @@ /x86_64/state_test /x86_64/sync_regs_test /x86_64/vmx_close_while_nested_test +/x86_64/vmx_dirty_log_test /x86_64/vmx_set_nested_state_test /x86_64/vmx_tsc_adjust_test /clear_dirty_log_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 62c591f87dab..c5ec868fa1e5 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -22,6 +22,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/smm_test TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += clear_dirty_log_test @@ -48,7 +49,7 @@ CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(UNAME_M) -I.. no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \ - $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) + $(CC) -Werror -no-pie -x c - -o "$$TMP", -no-pie) # On s390, build the testcases KVM-enabled pgste-option = $(call try-run, echo 'int main() { return 0; }' | \ diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index dc3346e090f5..5614222a6628 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -19,8 +19,6 @@ #include "kvm_util.h" #include "processor.h" -#define DEBUG printf - #define VCPU_ID 1 /* The memory slot index to track dirty pages */ @@ -249,14 +247,12 @@ static void vm_dirty_log_verify(unsigned long *bmap) } static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, - uint64_t extra_mem_pages, void *guest_code, - unsigned long type) + uint64_t extra_mem_pages, void *guest_code) { struct kvm_vm *vm; uint64_t extra_pg_pages = extra_mem_pages / 512 * 2; - vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, - O_RDWR, type); + vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); kvm_vm_elf_load(vm, program_invocation_name, 0, 0); #ifdef __x86_64__ vm_create_irqchip(vm); @@ -265,67 +261,35 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, return vm; } +#define DIRTY_MEM_BITS 30 /* 1G */ +#define PAGE_SHIFT_4K 12 + static void run_test(enum vm_guest_mode mode, unsigned long iterations, unsigned long interval, uint64_t phys_offset) { - unsigned int guest_pa_bits, guest_page_shift; pthread_t vcpu_thread; struct kvm_vm *vm; - uint64_t max_gfn; unsigned long *bmap; - unsigned long type = 0; - - switch (mode) { - case VM_MODE_P52V48_4K: - guest_pa_bits = 52; - guest_page_shift = 12; - break; - case VM_MODE_P52V48_64K: - guest_pa_bits = 52; - guest_page_shift = 16; - break; - case VM_MODE_P48V48_4K: - guest_pa_bits = 48; - guest_page_shift = 12; - break; - case VM_MODE_P48V48_64K: - guest_pa_bits = 48; - guest_page_shift = 16; - break; - case VM_MODE_P40V48_4K: - guest_pa_bits = 40; - guest_page_shift = 12; - break; - case VM_MODE_P40V48_64K: - guest_pa_bits = 40; - guest_page_shift = 16; - break; - default: - TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode); - } - DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - -#ifdef __x86_64__ /* - * FIXME - * The x86_64 kvm selftests framework currently only supports a - * single PML4 which restricts the number of physical address - * bits we can change to 39. + * We reserve page table for 2 times of extra dirty mem which + * will definitely cover the original (1G+) test range. Here + * we do the calculation with 4K page size which is the + * smallest so the page number will be enough for all archs + * (e.g., 64K page size guest will need even less memory for + * page tables). */ - guest_pa_bits = 39; -#endif -#ifdef __aarch64__ - if (guest_pa_bits != 40) - type = KVM_VM_TYPE_ARM_IPA_SIZE(guest_pa_bits); -#endif - max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1; - guest_page_size = (1ul << guest_page_shift); + vm = create_vm(mode, VCPU_ID, + 2ul << (DIRTY_MEM_BITS - PAGE_SHIFT_4K), + guest_code); + + guest_page_size = vm_get_page_size(vm); /* * A little more than 1G of guest page sized pages. Cover the * case where the size is not aligned to 64 pages. */ - guest_num_pages = (1ul << (30 - guest_page_shift)) + 16; + guest_num_pages = (1ul << (DIRTY_MEM_BITS - + vm_get_page_shift(vm))) + 16; #ifdef __s390x__ /* Round up to multiple of 1M (segment size) */ guest_num_pages = (guest_num_pages + 0xff) & ~0xffUL; @@ -335,7 +299,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, !!((guest_num_pages * guest_page_size) % host_page_size); if (!phys_offset) { - guest_test_phys_mem = (max_gfn - guest_num_pages) * guest_page_size; + guest_test_phys_mem = (vm_get_max_gfn(vm) - + guest_num_pages) * guest_page_size; guest_test_phys_mem &= ~(host_page_size - 1); } else { guest_test_phys_mem = phys_offset; @@ -351,8 +316,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, bmap = bitmap_alloc(host_num_pages); host_bmap_track = bitmap_alloc(host_num_pages); - vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code, type); - #ifdef USE_CLEAR_DIRTY_LOG struct kvm_enable_cap cap = {}; @@ -482,7 +445,7 @@ int main(int argc, char *argv[]) #endif #ifdef __x86_64__ - vm_guest_mode_params_init(VM_MODE_P52V48_4K, true, true); + vm_guest_mode_params_init(VM_MODE_PXXV48_4K, true, true); #endif #ifdef __aarch64__ vm_guest_mode_params_init(VM_MODE_P40V48_4K, true, true); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 5463b7896a0a..29cccaf96baf 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -24,6 +24,12 @@ struct kvm_vm; typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ +#ifndef NDEBUG +#define DEBUG(...) printf(__VA_ARGS__); +#else +#define DEBUG(...) +#endif + /* Minimum allocated guest virtual and physical addresses */ #define KVM_UTIL_MIN_VADDR 0x2000 @@ -38,11 +44,14 @@ enum vm_guest_mode { VM_MODE_P48V48_64K, VM_MODE_P40V48_4K, VM_MODE_P40V48_64K, + VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */ NUM_VM_MODES, }; -#ifdef __aarch64__ +#if defined(__aarch64__) #define VM_MODE_DEFAULT VM_MODE_P40V48_4K +#elif defined(__x86_64__) +#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K #else #define VM_MODE_DEFAULT VM_MODE_P52V48_4K #endif @@ -60,8 +69,7 @@ int kvm_check_cap(long cap); int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap); struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); -struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, - int perm, unsigned long type); +struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); void kvm_vm_free(struct kvm_vm *vmp); void kvm_vm_restart(struct kvm_vm *vmp, int perm); void kvm_vm_release(struct kvm_vm *vmp); @@ -146,6 +154,10 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); bool vm_is_unrestricted_guest(struct kvm_vm *vm); +unsigned int vm_get_page_size(struct kvm_vm *vm); +unsigned int vm_get_page_shift(struct kvm_vm *vm); +unsigned int vm_get_max_gfn(struct kvm_vm *vm); + struct kvm_userspace_memory_region * kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end); diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 80d19740d2dc..ff234018219c 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -325,6 +325,9 @@ uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index); void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, uint64_t msr_value); +uint32_t kvm_get_cpuid_max(void); +void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); + /* * Basic CPU control in CR0 */ @@ -1080,6 +1083,9 @@ void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, #define VMX_BASIC_MEM_TYPE_WB 6LLU #define VMX_BASIC_INOUT 0x0040000000000000LLU +/* VMX_EPT_VPID_CAP bits */ +#define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21) + /* MSR_IA32_VMX_MISC bits */ #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) #define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index 69b17055f63d..f52e0ba84fed 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -569,6 +569,10 @@ struct vmx_pages { void *enlightened_vmcs_hva; uint64_t enlightened_vmcs_gpa; void *enlightened_vmcs; + + void *eptp_hva; + uint64_t eptp_gpa; + void *eptp; }; struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva); @@ -576,4 +580,16 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx); void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp); bool load_vmcs(struct vmx_pages *vmx); +void nested_vmx_check_supported(void); + +void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot); +void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint64_t size, + uint32_t eptp_memslot); +void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t memslot, uint32_t eptp_memslot); +void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t eptp_memslot); + #endif /* SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 486400a97374..86036a59a668 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -264,6 +264,9 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini case VM_MODE_P52V48_4K: TEST_ASSERT(false, "AArch64 does not support 4K sized pages " "with 52-bit physical address ranges"); + case VM_MODE_PXXV48_4K: + TEST_ASSERT(false, "AArch64 does not support 4K sized pages " + "with ANY-bit physical address ranges"); case VM_MODE_P52V48_64K: tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 6e49bb039376..41cf45416060 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -8,6 +8,7 @@ #include "test_util.h" #include "kvm_util.h" #include "kvm_util_internal.h" +#include "processor.h" #include <assert.h> #include <sys/mman.h> @@ -84,7 +85,7 @@ int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap) return ret; } -static void vm_open(struct kvm_vm *vm, int perm, unsigned long type) +static void vm_open(struct kvm_vm *vm, int perm) { vm->kvm_fd = open(KVM_DEV_PATH, perm); if (vm->kvm_fd < 0) @@ -95,18 +96,19 @@ static void vm_open(struct kvm_vm *vm, int perm, unsigned long type) exit(KSFT_SKIP); } - vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, type); + vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, vm->type); TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, " "rc: %i errno: %i", vm->fd, errno); } const char * const vm_guest_mode_string[] = { - "PA-bits:52, VA-bits:48, 4K pages", - "PA-bits:52, VA-bits:48, 64K pages", - "PA-bits:48, VA-bits:48, 4K pages", - "PA-bits:48, VA-bits:48, 64K pages", - "PA-bits:40, VA-bits:48, 4K pages", - "PA-bits:40, VA-bits:48, 64K pages", + "PA-bits:52, VA-bits:48, 4K pages", + "PA-bits:52, VA-bits:48, 64K pages", + "PA-bits:48, VA-bits:48, 4K pages", + "PA-bits:48, VA-bits:48, 64K pages", + "PA-bits:40, VA-bits:48, 4K pages", + "PA-bits:40, VA-bits:48, 64K pages", + "PA-bits:ANY, VA-bits:48, 4K pages", }; _Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES, "Missing new mode strings?"); @@ -130,17 +132,17 @@ _Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES, * descriptor to control the created VM is created with the permissions * given by perm (e.g. O_RDWR). */ -struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, - int perm, unsigned long type) +struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) { struct kvm_vm *vm; + DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + vm = calloc(1, sizeof(*vm)); TEST_ASSERT(vm != NULL, "Insufficient Memory"); vm->mode = mode; - vm->type = type; - vm_open(vm, perm, type); + vm->type = 0; /* Setup mode specific traits. */ switch (vm->mode) { @@ -186,10 +188,32 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, vm->page_size = 0x10000; vm->page_shift = 16; break; + case VM_MODE_PXXV48_4K: +#ifdef __x86_64__ + kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits); + TEST_ASSERT(vm->va_bits == 48, "Linear address width " + "(%d bits) not supported", vm->va_bits); + vm->pgtable_levels = 4; + vm->page_size = 0x1000; + vm->page_shift = 12; + DEBUG("Guest physical address width detected: %d\n", + vm->pa_bits); +#else + TEST_ASSERT(false, "VM_MODE_PXXV48_4K not supported on " + "non-x86 platforms"); +#endif + break; default: TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode); } +#ifdef __aarch64__ + if (vm->pa_bits != 40) + vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits); +#endif + + vm_open(vm, perm); + /* Limit to VA-bit canonical virtual addresses. */ vm->vpages_valid = sparsebit_alloc(); sparsebit_set_num(vm->vpages_valid, @@ -212,7 +236,7 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) { - return _vm_create(mode, phy_pages, perm, 0); + return _vm_create(mode, phy_pages, perm); } /* @@ -232,7 +256,7 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm) { struct userspace_mem_region *region; - vm_open(vmp, perm, vmp->type); + vm_open(vmp, perm); if (vmp->has_irqchip) vm_create_irqchip(vmp); @@ -681,7 +705,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, * on error (e.g. currently no memory region using memslot as a KVM * memory slot ID). */ -static struct userspace_mem_region * +struct userspace_mem_region * memslot2region(struct kvm_vm *vm, uint32_t memslot) { struct userspace_mem_region *region; @@ -1628,3 +1652,18 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm) return val == 'Y'; } + +unsigned int vm_get_page_size(struct kvm_vm *vm) +{ + return vm->page_size; +} + +unsigned int vm_get_page_shift(struct kvm_vm *vm) +{ + return vm->page_shift; +} + +unsigned int vm_get_max_gfn(struct kvm_vm *vm) +{ + return vm->max_gfn; +} diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index f36262e0f655..ac50c42750cf 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -68,4 +68,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent); void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent); +struct userspace_mem_region * +memslot2region(struct kvm_vm *vm, uint32_t memslot); + #endif /* SELFTEST_KVM_UTIL_INTERNAL_H */ diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 0a5e487dbc50..6698cb741e10 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -228,7 +228,7 @@ void sregs_dump(FILE *stream, struct kvm_sregs *sregs, void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) { - TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); /* If needed, create page map l4 table. */ @@ -261,7 +261,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, uint16_t index[4]; struct pageMapL4Entry *pml4e; - TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); TEST_ASSERT((vaddr % vm->page_size) == 0, @@ -547,7 +547,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) struct pageDirectoryEntry *pde; struct pageTableEntry *pte; - TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); index[0] = (gva >> 12) & 0x1ffu; @@ -621,7 +621,7 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot); switch (vm->mode) { - case VM_MODE_P52V48_4K: + case VM_MODE_PXXV48_4K: sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); @@ -1085,7 +1085,7 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) for (i = 0; i < nmsrs; i++) state->msrs.entries[i].index = list->indices[i]; r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs); - TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed at %x)", + TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)", r, r == nmsrs ? -1 : list->indices[r]); r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs); @@ -1157,3 +1157,25 @@ bool is_intel_cpu(void) chunk = (const uint32_t *)("GenuineIntel"); return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]); } + +uint32_t kvm_get_cpuid_max(void) +{ + return kvm_get_supported_cpuid_entry(0x80000000)->eax; +} + +void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) +{ + struct kvm_cpuid_entry2 *entry; + bool pae; + + /* SDM 4.1.4 */ + if (kvm_get_cpuid_max() < 0x80000008) { + pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6); + *pa_bits = pae ? 36 : 32; + *va_bits = 32; + } else { + entry = kvm_get_supported_cpuid_entry(0x80000008); + *pa_bits = entry->eax & 0xff; + *va_bits = (entry->eax >> 8) & 0xff; + } +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c index 4bfc9a90b1de..da4d89ad5419 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c @@ -32,7 +32,7 @@ void ucall(uint64_t cmd, int nargs, ...) va_end(va); asm volatile("in %[port], %%al" - : : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax"); + : : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax", "memory"); } uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index 9cef0455b819..f6ec97b7eaef 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -7,11 +7,39 @@ #include "test_util.h" #include "kvm_util.h" +#include "../kvm_util_internal.h" #include "processor.h" #include "vmx.h" +#define PAGE_SHIFT_4K 12 + +#define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000 + bool enable_evmcs; +struct eptPageTableEntry { + uint64_t readable:1; + uint64_t writable:1; + uint64_t executable:1; + uint64_t memory_type:3; + uint64_t ignore_pat:1; + uint64_t page_size:1; + uint64_t accessed:1; + uint64_t dirty:1; + uint64_t ignored_11_10:2; + uint64_t address:40; + uint64_t ignored_62_52:11; + uint64_t suppress_ve:1; +}; + +struct eptPageTablePointer { + uint64_t memory_type:3; + uint64_t page_walk_length:3; + uint64_t ad_enabled:1; + uint64_t reserved_11_07:5; + uint64_t address:40; + uint64_t reserved_63_52:12; +}; int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id) { uint16_t evmcs_ver; @@ -174,15 +202,35 @@ bool load_vmcs(struct vmx_pages *vmx) */ static inline void init_vmcs_control_fields(struct vmx_pages *vmx) { + uint32_t sec_exec_ctl = 0; + vmwrite(VIRTUAL_PROCESSOR_ID, 0); vmwrite(POSTED_INTR_NV, 0); vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS)); - if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, 0)) + + if (vmx->eptp_gpa) { + uint64_t ept_paddr; + struct eptPageTablePointer eptp = { + .memory_type = VMX_BASIC_MEM_TYPE_WB, + .page_walk_length = 3, /* + 1 */ + .ad_enabled = !!(rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & VMX_EPT_VPID_CAP_AD_BITS), + .address = vmx->eptp_gpa >> PAGE_SHIFT_4K, + }; + + memcpy(&ept_paddr, &eptp, sizeof(ept_paddr)); + vmwrite(EPT_POINTER, ept_paddr); + sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT; + } + + if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, sec_exec_ctl)) vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS); - else + else { vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS)); + GUEST_ASSERT(!sec_exec_ctl); + } + vmwrite(EXCEPTION_BITMAP, 0); vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0); vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */ @@ -327,3 +375,162 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp) init_vmcs_host_state(); init_vmcs_guest_state(guest_rip, guest_rsp); } + +void nested_vmx_check_supported(void) +{ + struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); + + if (!(entry->ecx & CPUID_VMX)) { + fprintf(stderr, "nested VMX not enabled, skipping test\n"); + exit(KSFT_SKIP); + } +} + +void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot) +{ + uint16_t index[4]; + struct eptPageTableEntry *pml4e; + + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " + "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + + TEST_ASSERT((nested_paddr % vm->page_size) == 0, + "Nested physical address not on page boundary,\n" + " nested_paddr: 0x%lx vm->page_size: 0x%x", + nested_paddr, vm->page_size); + TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + TEST_ASSERT((paddr % vm->page_size) == 0, + "Physical address not on page boundary,\n" + " paddr: 0x%lx vm->page_size: 0x%x", + paddr, vm->page_size); + TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + + index[0] = (nested_paddr >> 12) & 0x1ffu; + index[1] = (nested_paddr >> 21) & 0x1ffu; + index[2] = (nested_paddr >> 30) & 0x1ffu; + index[3] = (nested_paddr >> 39) & 0x1ffu; + + /* Allocate page directory pointer table if not present. */ + pml4e = vmx->eptp_hva; + if (!pml4e[index[3]].readable) { + pml4e[index[3]].address = vm_phy_page_alloc(vm, + KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot) + >> vm->page_shift; + pml4e[index[3]].writable = true; + pml4e[index[3]].readable = true; + pml4e[index[3]].executable = true; + } + + /* Allocate page directory table if not present. */ + struct eptPageTableEntry *pdpe; + pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); + if (!pdpe[index[2]].readable) { + pdpe[index[2]].address = vm_phy_page_alloc(vm, + KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot) + >> vm->page_shift; + pdpe[index[2]].writable = true; + pdpe[index[2]].readable = true; + pdpe[index[2]].executable = true; + } + + /* Allocate page table if not present. */ + struct eptPageTableEntry *pde; + pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); + if (!pde[index[1]].readable) { + pde[index[1]].address = vm_phy_page_alloc(vm, + KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot) + >> vm->page_shift; + pde[index[1]].writable = true; + pde[index[1]].readable = true; + pde[index[1]].executable = true; + } + + /* Fill in page table entry. */ + struct eptPageTableEntry *pte; + pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); + pte[index[0]].address = paddr >> vm->page_shift; + pte[index[0]].writable = true; + pte[index[0]].readable = true; + pte[index[0]].executable = true; + + /* + * For now mark these as accessed and dirty because the only + * testcase we have needs that. Can be reconsidered later. + */ + pte[index[0]].accessed = true; + pte[index[0]].dirty = true; +} + +/* + * Map a range of EPT guest physical addresses to the VM's physical address + * + * Input Args: + * vm - Virtual Machine + * nested_paddr - Nested guest physical address to map + * paddr - VM Physical Address + * size - The size of the range to map + * eptp_memslot - Memory region slot for new virtual translation tables + * + * Output Args: None + * + * Return: None + * + * Within the VM given by vm, creates a nested guest translation for the + * page range starting at nested_paddr to the page range starting at paddr. + */ +void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint64_t size, + uint32_t eptp_memslot) +{ + size_t page_size = vm->page_size; + size_t npages = size / page_size; + + TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow"); + TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); + + while (npages--) { + nested_pg_map(vmx, vm, nested_paddr, paddr, eptp_memslot); + nested_paddr += page_size; + paddr += page_size; + } +} + +/* Prepare an identity extended page table that maps all the + * physical pages in VM. + */ +void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t memslot, uint32_t eptp_memslot) +{ + sparsebit_idx_t i, last; + struct userspace_mem_region *region = + memslot2region(vm, memslot); + + i = (region->region.guest_phys_addr >> vm->page_shift) - 1; + last = i + (region->region.memory_size >> vm->page_shift); + for (;;) { + i = sparsebit_next_clear(region->unused_phy_pages, i); + if (i > last) + break; + + nested_map(vmx, vm, + (uint64_t)i << vm->page_shift, + (uint64_t)i << vm->page_shift, + 1 << vm->page_shift, + eptp_memslot); + } +} + +void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t eptp_memslot) +{ + vmx->eptp = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp); + vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp); +} diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c index ee59831fbc98..443a2b54645b 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c @@ -26,6 +26,25 @@ static void guest_code(void) { } +static int smt_possible(void) +{ + char buf[16]; + FILE *f; + bool res = 1; + + f = fopen("/sys/devices/system/cpu/smt/control", "r"); + if (f) { + if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) { + if (!strncmp(buf, "forceoff", 8) || + !strncmp(buf, "notsupported", 12)) + res = 0; + } + fclose(f); + } + + return res; +} + static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, int evmcs_enabled) { @@ -59,6 +78,14 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, TEST_ASSERT(!entry->padding[0] && !entry->padding[1] && !entry->padding[2], "padding should be zero"); + if (entry->function == 0x40000004) { + int nononarchcs = !!(entry->eax & (1UL << 18)); + + TEST_ASSERT(nononarchcs == !smt_possible(), + "NoNonArchitecturalCoreSharing bit" + " doesn't reflect SMT setting"); + } + /* * If needed for debug: * fprintf(stdout, diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c index 11c2a70a7b87..5c8224256294 100644 --- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c @@ -22,18 +22,19 @@ #define VCPU_ID 5 +#define UCALL_PIO_PORT ((uint16_t)0x1000) + +/* + * ucall is embedded here to protect against compiler reshuffling registers + * before calling a function. In this test we only need to get KVM_EXIT_IO + * vmexit and preserve RBX, no additional information is needed. + */ void guest_code(void) { - /* - * use a callee-save register, otherwise the compiler - * saves it around the call to GUEST_SYNC. - */ - register u32 stage asm("rbx"); - for (;;) { - GUEST_SYNC(0); - stage++; - asm volatile ("" : : "r" (stage)); - } + asm volatile("1: in %[port], %%al\n" + "add $0x1, %%rbx\n" + "jmp 1b" + : : [port] "d" (UCALL_PIO_PORT) : "rax", "rbx"); } static void compare_regs(struct kvm_regs *left, struct kvm_regs *right) diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c index 3b0ffe01dacd..5dfb53546a26 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c @@ -53,12 +53,8 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) int main(int argc, char *argv[]) { vm_vaddr_t vmx_pages_gva; - struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); - if (!(entry->ecx & CPUID_VMX)) { - fprintf(stderr, "nested VMX not enabled, skipping test\n"); - exit(KSFT_SKIP); - } + nested_vmx_check_supported(); vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c new file mode 100644 index 000000000000..a223a6401258 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM dirty page logging test + * + * Copyright (C) 2018, Red Hat, Inc. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include <stdio.h> +#include <stdlib.h> +#include <linux/bitmap.h> +#include <linux/bitops.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#define VCPU_ID 1 + +/* The memory slot index to track dirty pages */ +#define TEST_MEM_SLOT_INDEX 1 +#define TEST_MEM_SIZE 3 + +/* L1 guest test virtual memory offset */ +#define GUEST_TEST_MEM 0xc0000000 + +/* L2 guest test virtual memory offset */ +#define NESTED_TEST_MEM1 0xc0001000 +#define NESTED_TEST_MEM2 0xc0002000 + +static void l2_guest_code(void) +{ + *(volatile uint64_t *)NESTED_TEST_MEM1; + *(volatile uint64_t *)NESTED_TEST_MEM1 = 1; + GUEST_SYNC(true); + GUEST_SYNC(false); + + *(volatile uint64_t *)NESTED_TEST_MEM2 = 1; + GUEST_SYNC(true); + *(volatile uint64_t *)NESTED_TEST_MEM2 = 1; + GUEST_SYNC(true); + GUEST_SYNC(false); + + /* Exit to L1 and never come back. */ + vmcall(); +} + +void l1_guest_code(struct vmx_pages *vmx) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + GUEST_ASSERT(vmx->vmcs_gpa); + GUEST_ASSERT(prepare_for_vmx_operation(vmx)); + GUEST_ASSERT(load_vmcs(vmx)); + + prepare_vmcs(vmx, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_SYNC(false); + GUEST_ASSERT(!vmlaunch()); + GUEST_SYNC(false); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t vmx_pages_gva = 0; + struct vmx_pages *vmx; + unsigned long *bmap; + uint64_t *host_test_mem; + + struct kvm_vm *vm; + struct kvm_run *run; + struct ucall uc; + bool done = false; + + nested_vmx_check_supported(); + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, l1_guest_code); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + run = vcpu_state(vm, VCPU_ID); + + /* Add an extra memory slot for testing dirty logging */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + GUEST_TEST_MEM, + TEST_MEM_SLOT_INDEX, + TEST_MEM_SIZE, + KVM_MEM_LOG_DIRTY_PAGES); + + /* + * Add an identity map for GVA range [0xc0000000, 0xc0002000). This + * affects both L1 and L2. However... + */ + virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, + TEST_MEM_SIZE * 4096, 0); + + /* + * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to + * 0xc0000000. + * + * Note that prepare_eptp should be called only L1's GPA map is done, + * meaning after the last call to virt_map. + */ + prepare_eptp(vmx, vm, 0); + nested_map_memslot(vmx, vm, 0, 0); + nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096, 0); + nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096, 0); + + bmap = bitmap_alloc(TEST_MEM_SIZE); + host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM); + + while (!done) { + memset(host_test_mem, 0xaa, TEST_MEM_SIZE * 4096); + _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0], + __FILE__, uc.args[1]); + /* NOT REACHED */ + case UCALL_SYNC: + /* + * The nested guest wrote at offset 0x1000 in the memslot, but the + * dirty bitmap must be filled in according to L1 GPA, not L2. + */ + kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); + if (uc.args[1]) { + TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean\n"); + TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest\n"); + } else { + TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty\n"); + TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest\n"); + } + + TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty\n"); + TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest\n"); + TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty\n"); + TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest\n"); + break; + case UCALL_DONE: + done = true; + break; + default: + TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); + } + } +} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c index 853e370e8a39..9ef7fab39d48 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c @@ -224,7 +224,6 @@ int main(int argc, char *argv[]) { struct kvm_vm *vm; struct kvm_nested_state state; - struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS); @@ -237,10 +236,7 @@ int main(int argc, char *argv[]) * AMD currently does not implement set_nested_state, so for now we * just early out. */ - if (!(entry->ecx & CPUID_VMX)) { - fprintf(stderr, "nested VMX not enabled, skipping test\n"); - exit(KSFT_SKIP); - } + nested_vmx_check_supported(); vm = vm_create_default(VCPU_ID, 0, 0); @@ -271,12 +267,7 @@ int main(int argc, char *argv[]) state.flags = KVM_STATE_NESTED_RUN_PENDING; test_nested_state_expect_einval(vm, &state); - /* - * TODO: When SVM support is added for KVM_SET_NESTED_STATE - * add tests here to support it like VMX. - */ - if (entry->ecx & CPUID_VMX) - test_vmx_nested_state(vm); + test_vmx_nested_state(vm); kvm_vm_free(vm); return 0; diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c index f36c10eba71e..5590fd2bcf87 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c @@ -128,12 +128,8 @@ static void report(int64_t val) int main(int argc, char *argv[]) { vm_vaddr_t vmx_pages_gva; - struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); - if (!(entry->ecx & CPUID_VMX)) { - fprintf(stderr, "nested VMX not enabled, skipping test\n"); - exit(KSFT_SKIP); - } + nested_vmx_check_supported(); vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); diff --git a/tools/testing/selftests/membarrier/.gitignore b/tools/testing/selftests/membarrier/.gitignore index 020c44f49a9e..f2f7ec0a99b4 100644 --- a/tools/testing/selftests/membarrier/.gitignore +++ b/tools/testing/selftests/membarrier/.gitignore @@ -1 +1,2 @@ -membarrier_test +membarrier_test_multi_thread +membarrier_test_single_thread diff --git a/tools/testing/selftests/membarrier/Makefile b/tools/testing/selftests/membarrier/Makefile index 97e3bdf3d1e9..34d1c81a2324 100644 --- a/tools/testing/selftests/membarrier/Makefile +++ b/tools/testing/selftests/membarrier/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only CFLAGS += -g -I../../../../usr/include/ +LDLIBS += -lpthread -TEST_GEN_PROGS := membarrier_test +TEST_GEN_PROGS := membarrier_test_single_thread \ + membarrier_test_multi_thread include ../lib.mk - diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test_impl.h index 70b4ddbf126b..186be69f0a59 100644 --- a/tools/testing/selftests/membarrier/membarrier_test.c +++ b/tools/testing/selftests/membarrier/membarrier_test_impl.h @@ -1,10 +1,11 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ #define _GNU_SOURCE #include <linux/membarrier.h> #include <syscall.h> #include <stdio.h> #include <errno.h> #include <string.h> +#include <pthread.h> #include "../kselftest.h" @@ -223,7 +224,7 @@ static int test_membarrier_global_expedited_success(void) return 0; } -static int test_membarrier(void) +static int test_membarrier_fail(void) { int status; @@ -233,10 +234,27 @@ static int test_membarrier(void) status = test_membarrier_flags_fail(); if (status) return status; - status = test_membarrier_global_success(); + status = test_membarrier_private_expedited_fail(); if (status) return status; - status = test_membarrier_private_expedited_fail(); + status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0); + if (status < 0) { + ksft_test_result_fail("sys_membarrier() failed\n"); + return status; + } + if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) { + status = test_membarrier_private_expedited_sync_core_fail(); + if (status) + return status; + } + return 0; +} + +static int test_membarrier_success(void) +{ + int status; + + status = test_membarrier_global_success(); if (status) return status; status = test_membarrier_register_private_expedited_success(); @@ -251,9 +269,6 @@ static int test_membarrier(void) return status; } if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) { - status = test_membarrier_private_expedited_sync_core_fail(); - if (status) - return status; status = test_membarrier_register_private_expedited_sync_core_success(); if (status) return status; @@ -300,14 +315,3 @@ static int test_membarrier_query(void) ksft_test_result_pass("sys_membarrier available\n"); return 0; } - -int main(int argc, char **argv) -{ - ksft_print_header(); - ksft_set_plan(13); - - test_membarrier_query(); - test_membarrier(); - - return ksft_exit_pass(); -} diff --git a/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c b/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c new file mode 100644 index 000000000000..ac5613e5b0eb --- /dev/null +++ b/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <linux/membarrier.h> +#include <syscall.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <pthread.h> + +#include "membarrier_test_impl.h" + +static int thread_ready, thread_quit; +static pthread_mutex_t test_membarrier_thread_mutex = + PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t test_membarrier_thread_cond = + PTHREAD_COND_INITIALIZER; + +void *test_membarrier_thread(void *arg) +{ + pthread_mutex_lock(&test_membarrier_thread_mutex); + thread_ready = 1; + pthread_cond_broadcast(&test_membarrier_thread_cond); + pthread_mutex_unlock(&test_membarrier_thread_mutex); + + pthread_mutex_lock(&test_membarrier_thread_mutex); + while (!thread_quit) + pthread_cond_wait(&test_membarrier_thread_cond, + &test_membarrier_thread_mutex); + pthread_mutex_unlock(&test_membarrier_thread_mutex); + + return NULL; +} + +static int test_mt_membarrier(void) +{ + int i; + pthread_t test_thread; + + pthread_create(&test_thread, NULL, + test_membarrier_thread, NULL); + + pthread_mutex_lock(&test_membarrier_thread_mutex); + while (!thread_ready) + pthread_cond_wait(&test_membarrier_thread_cond, + &test_membarrier_thread_mutex); + pthread_mutex_unlock(&test_membarrier_thread_mutex); + + test_membarrier_fail(); + + test_membarrier_success(); + + pthread_mutex_lock(&test_membarrier_thread_mutex); + thread_quit = 1; + pthread_cond_broadcast(&test_membarrier_thread_cond); + pthread_mutex_unlock(&test_membarrier_thread_mutex); + + pthread_join(test_thread, NULL); + + return 0; +} + +int main(int argc, char **argv) +{ + ksft_print_header(); + ksft_set_plan(13); + + test_membarrier_query(); + + /* Multi-threaded */ + test_mt_membarrier(); + + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/membarrier/membarrier_test_single_thread.c b/tools/testing/selftests/membarrier/membarrier_test_single_thread.c new file mode 100644 index 000000000000..c1c963902854 --- /dev/null +++ b/tools/testing/selftests/membarrier/membarrier_test_single_thread.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <linux/membarrier.h> +#include <syscall.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <pthread.h> + +#include "membarrier_test_impl.h" + +int main(int argc, char **argv) +{ + ksft_print_header(); + ksft_set_plan(13); + + test_membarrier_query(); + + test_membarrier_fail(); + + test_membarrier_success(); + + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index c7cced739c34..8aefd81fbc86 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -21,3 +21,4 @@ ipv6_flowlabel ipv6_flowlabel_mgr so_txtime tcp_fastopen_backup_key +nettest diff --git a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh index e6828732843e..9dc35a16e415 100755 --- a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh +++ b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh @@ -15,6 +15,8 @@ PAUSE_ON_FAIL=no VERBOSE=0 +which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) + ################################################################################ # helpers @@ -200,7 +202,7 @@ validate_v6_exception() local rc if [ ${ping_sz} != "0" ]; then - run_cmd ip netns exec h0 ping6 -s ${ping_sz} -c5 -w5 ${dst} + run_cmd ip netns exec h0 ${ping6} -s ${ping_sz} -c5 -w5 ${dst} fi if [ "$VERBOSE" = "1" ]; then @@ -243,7 +245,7 @@ do run_cmd taskset -c ${c} ip netns exec h0 ping -c1 -w1 172.16.10${i}.1 [ $? -ne 0 ] && printf "\nERROR: ping to h${i} failed\n" && ret=1 - run_cmd taskset -c ${c} ip netns exec h0 ping6 -c1 -w1 2001:db8:10${i}::1 + run_cmd taskset -c ${c} ip netns exec h0 ${ping6} -c1 -w1 2001:db8:10${i}::1 [ $? -ne 0 ] && printf "\nERROR: ping6 to h${i} failed\n" && ret=1 [ $ret -ne 0 ] && break diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index f9ebeac1e6f2..796670ebc65b 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -940,6 +940,20 @@ basic() run_cmd "$IP nexthop add id 104 group 1 dev veth1" log_test $? 2 "Nexthop group and device" + # Tests to ensure that flushing works as expected. + run_cmd "$IP nexthop add id 105 blackhole proto 99" + run_cmd "$IP nexthop add id 106 blackhole proto 100" + run_cmd "$IP nexthop add id 107 blackhole proto 99" + run_cmd "$IP nexthop flush proto 99" + check_nexthop "id 105" "" + check_nexthop "id 106" "id 106 blackhole proto 100" + check_nexthop "id 107" "" + run_cmd "$IP nexthop flush proto 100" + check_nexthop "id 106" "" + + run_cmd "$IP nexthop flush proto 100" + log_test $? 0 "Test proto flush" + run_cmd "$IP nexthop add id 104 group 1 blackhole" log_test $? 2 "Nexthop group and blackhole" diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 4465fc2dae14..76c1897e6352 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -9,7 +9,7 @@ ret=0 ksft_skip=4 # all tests in this script. Can be overridden with -t option -TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter" +TESTS="unregister down carrier nexthop suppress ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter" VERBOSE=0 PAUSE_ON_FAIL=no @@ -17,6 +17,8 @@ PAUSE=no IP="ip -netns ns1" NS_EXEC="ip netns exec ns1" +which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) + log_test() { local rc=$1 @@ -614,6 +616,20 @@ fib_nexthop_test() cleanup } +fib_suppress_test() +{ + $IP link add dummy1 type dummy + $IP link set dummy1 up + $IP -6 route add default dev dummy1 + $IP -6 rule add table main suppress_prefixlength 0 + ping -f -c 1000 -W 1 1234::1 || true + $IP -6 rule del table main suppress_prefixlength 0 + $IP link del dummy1 + + # If we got here without crashing, we're good. + return 0 +} + ################################################################################ # Tests on route add and replace @@ -1086,7 +1102,7 @@ ipv6_route_metrics_test() log_test $rc 0 "Multipath route with mtu metric" $IP -6 ro add 2001:db8:104::/64 via 2001:db8:101::2 mtu 1300 - run_cmd "ip netns exec ns1 ping6 -w1 -c1 -s 1500 2001:db8:104::1" + run_cmd "ip netns exec ns1 ${ping6} -w1 -c1 -s 1500 2001:db8:104::1" log_test $? 0 "Using route with mtu metric" run_cmd "$IP -6 ro add 2001:db8:114::/64 via 2001:db8:101::2 congctl lock foo" @@ -1422,6 +1438,27 @@ ipv4_addr_metric_test() fi log_test $rc 0 "Prefix route with metric on link up" + # explicitly check for metric changes on edge scenarios + run_cmd "$IP addr flush dev dummy2" + run_cmd "$IP addr add dev dummy2 172.16.104.0/24 metric 259" + run_cmd "$IP addr change dev dummy2 172.16.104.0/24 metric 260" + rc=$? + if [ $rc -eq 0 ]; then + check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.0 metric 260" + rc=$? + fi + log_test $rc 0 "Modify metric of .0/24 address" + + run_cmd "$IP addr flush dev dummy2" + run_cmd "$IP addr add dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 260" + run_cmd "$IP addr change dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 261" + rc=$? + if [ $rc -eq 0 ]; then + check_route "172.16.104.2 dev dummy2 proto kernel scope link src 172.16.104.1 metric 261" + rc=$? + fi + log_test $rc 0 "Modify metric of address with peer route" + $IP li del dummy1 $IP li del dummy2 cleanup @@ -1591,6 +1628,7 @@ do fib_carrier_test|carrier) fib_carrier_test;; fib_rp_filter_test|rp_filter) fib_rp_filter_test;; fib_nexthop_test|nexthop) fib_nexthop_test;; + fib_suppress_test|suppress) fib_suppress_test;; ipv6_route_test|ipv6_rt) ipv6_route_test;; ipv4_route_test|ipv4_rt) ipv4_route_test;; ipv6_addr_metric) ipv6_addr_metric_test;; diff --git a/tools/testing/selftests/net/l2tp.sh b/tools/testing/selftests/net/l2tp.sh index 5782433886fc..5782433886fc 100644..100755 --- a/tools/testing/selftests/net/l2tp.sh +++ b/tools/testing/selftests/net/l2tp.sh diff --git a/tools/testing/selftests/net/reuseport_dualstack.c b/tools/testing/selftests/net/reuseport_dualstack.c index fe3230c55986..fb7a59ed759e 100644 --- a/tools/testing/selftests/net/reuseport_dualstack.c +++ b/tools/testing/selftests/net/reuseport_dualstack.c @@ -129,7 +129,7 @@ static void test(int *rcv_fds, int count, int proto) { struct epoll_event ev; int epfd, i, test_fd; - uint16_t test_family; + int test_family; socklen_t len; epfd = epoll_create(1); @@ -146,6 +146,7 @@ static void test(int *rcv_fds, int count, int proto) send_from_v4(proto); test_fd = receive_once(epfd, proto); + len = sizeof(test_family); if (getsockopt(test_fd, SOL_SOCKET, SO_DOMAIN, &test_family, &len)) error(1, errno, "failed to read socket domain"); if (test_family != AF_INET) diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index b8265ee9923f..614b31aad168 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -89,12 +89,9 @@ struct testcase testcases_v4[] = { .tfail = true, }, { - /* send a single MSS: will fail with GSO, because the segment - * logic in udp4_ufo_fragment demands a gso skb to be > MTU - */ + /* send a single MSS: will fall back to no GSO */ .tlen = CONST_MSS_V4, .gso_len = CONST_MSS_V4, - .tfail = true, .r_num_mss = 1, }, { @@ -139,10 +136,9 @@ struct testcase testcases_v4[] = { .tfail = true, }, { - /* send a single 1B MSS: will fail, see single MSS above */ + /* send a single 1B MSS: will fall back to no GSO */ .tlen = 1, .gso_len = 1, - .tfail = true, .r_num_mss = 1, }, { @@ -196,12 +192,9 @@ struct testcase testcases_v6[] = { .tfail = true, }, { - /* send a single MSS: will fail with GSO, because the segment - * logic in udp4_ufo_fragment demands a gso skb to be > MTU - */ + /* send a single MSS: will fall back to no GSO */ .tlen = CONST_MSS_V6, .gso_len = CONST_MSS_V6, - .tfail = true, .r_num_mss = 1, }, { @@ -246,10 +239,9 @@ struct testcase testcases_v6[] = { .tfail = true, }, { - /* send a single 1B MSS: will fail, see single MSS above */ + /* send a single 1B MSS: will fall back to no GSO */ .tlen = 1, .gso_len = 1, - .tfail = true, .r_num_mss = 1, }, { diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile index 464c9b76148f..7550f08822a3 100644 --- a/tools/testing/selftests/pidfd/Makefile +++ b/tools/testing/selftests/pidfd/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS += -g -I../../../../usr/include/ -lpthread +CFLAGS += -g -I../../../../usr/include/ -pthread TEST_GEN_PROGS := pidfd_test pidfd_open_test pidfd_poll_test pidfd_wait diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index f1fbc15800c4..ed1565809d2b 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -4,6 +4,7 @@ noarg: TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \ large_vm_fork_separation +TEST_GEN_PROGS_EXTENDED := tlbie_test TEST_GEN_FILES := tempfile top_srcdir = ../../../../.. @@ -19,3 +20,4 @@ $(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64 $(OUTPUT)/tempfile: dd if=/dev/zero of=$@ bs=64k count=1 +$(OUTPUT)/tlbie_test: LDLIBS += -lpthread diff --git a/tools/testing/selftests/powerpc/mm/tlbie_test.c b/tools/testing/selftests/powerpc/mm/tlbie_test.c new file mode 100644 index 000000000000..f85a0938ab25 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/tlbie_test.c @@ -0,0 +1,734 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2019, Nick Piggin, Gautham R. Shenoy, Aneesh Kumar K.V, IBM Corp. + */ + +/* + * + * Test tlbie/mtpidr race. We have 4 threads doing flush/load/compare/store + * sequence in a loop. The same threads also rung a context switch task + * that does sched_yield() in loop. + * + * The snapshot thread mark the mmap area PROT_READ in between, make a copy + * and copy it back to the original area. This helps us to detect if any + * store continued to happen after we marked the memory PROT_READ. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/ipc.h> +#include <sys/shm.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <linux/futex.h> +#include <unistd.h> +#include <asm/unistd.h> +#include <string.h> +#include <stdlib.h> +#include <fcntl.h> +#include <sched.h> +#include <time.h> +#include <stdarg.h> +#include <sched.h> +#include <pthread.h> +#include <signal.h> +#include <sys/prctl.h> + +static inline void dcbf(volatile unsigned int *addr) +{ + __asm__ __volatile__ ("dcbf %y0; sync" : : "Z"(*(unsigned char *)addr) : "memory"); +} + +static void err_msg(char *msg) +{ + + time_t now; + time(&now); + printf("=================================\n"); + printf(" Error: %s\n", msg); + printf(" %s", ctime(&now)); + printf("=================================\n"); + exit(1); +} + +static char *map1; +static char *map2; +static pid_t rim_process_pid; + +/* + * A "rim-sequence" is defined to be the sequence of the following + * operations performed on a memory word: + * 1) FLUSH the contents of that word. + * 2) LOAD the contents of that word. + * 3) COMPARE the contents of that word with the content that was + * previously stored at that word + * 4) STORE new content into that word. + * + * The threads in this test that perform the rim-sequence are termed + * as rim_threads. + */ + +/* + * A "corruption" is defined to be the failed COMPARE operation in a + * rim-sequence. + * + * A rim_thread that detects a corruption informs about it to all the + * other rim_threads, and the mem_snapshot thread. + */ +static volatile unsigned int corruption_found; + +/* + * This defines the maximum number of rim_threads in this test. + * + * The THREAD_ID_BITS denote the number of bits required + * to represent the thread_ids [0..MAX_THREADS - 1]. + * We are being a bit paranoid here and set it to 8 bits, + * though 6 bits suffice. + * + */ +#define MAX_THREADS 64 +#define THREAD_ID_BITS 8 +#define THREAD_ID_MASK ((1 << THREAD_ID_BITS) - 1) +static unsigned int rim_thread_ids[MAX_THREADS]; +static pthread_t rim_threads[MAX_THREADS]; + + +/* + * Each rim_thread works on an exclusive "chunk" of size + * RIM_CHUNK_SIZE. + * + * The ith rim_thread works on the ith chunk. + * + * The ith chunk begins at + * map1 + (i * RIM_CHUNK_SIZE) + */ +#define RIM_CHUNK_SIZE 1024 +#define BITS_PER_BYTE 8 +#define WORD_SIZE (sizeof(unsigned int)) +#define WORD_BITS (WORD_SIZE * BITS_PER_BYTE) +#define WORDS_PER_CHUNK (RIM_CHUNK_SIZE/WORD_SIZE) + +static inline char *compute_chunk_start_addr(unsigned int thread_id) +{ + char *chunk_start; + + chunk_start = (char *)((unsigned long)map1 + + (thread_id * RIM_CHUNK_SIZE)); + + return chunk_start; +} + +/* + * The "word-offset" of a word-aligned address inside a chunk, is + * defined to be the number of words that precede the address in that + * chunk. + * + * WORD_OFFSET_BITS denote the number of bits required to represent + * the word-offsets of all the word-aligned addresses of a chunk. + */ +#define WORD_OFFSET_BITS (__builtin_ctz(WORDS_PER_CHUNK)) +#define WORD_OFFSET_MASK ((1 << WORD_OFFSET_BITS) - 1) + +static inline unsigned int compute_word_offset(char *start, unsigned int *addr) +{ + unsigned int delta_bytes, ret; + delta_bytes = (unsigned long)addr - (unsigned long)start; + + ret = delta_bytes/WORD_SIZE; + + return ret; +} + +/* + * A "sweep" is defined to be the sequential execution of the + * rim-sequence by a rim_thread on its chunk one word at a time, + * starting from the first word of its chunk and ending with the last + * word of its chunk. + * + * Each sweep of a rim_thread is uniquely identified by a sweep_id. + * SWEEP_ID_BITS denote the number of bits required to represent + * the sweep_ids of rim_threads. + * + * As to why SWEEP_ID_BITS are computed as a function of THREAD_ID_BITS, + * WORD_OFFSET_BITS, and WORD_BITS, see the "store-pattern" below. + */ +#define SWEEP_ID_BITS (WORD_BITS - (THREAD_ID_BITS + WORD_OFFSET_BITS)) +#define SWEEP_ID_MASK ((1 << SWEEP_ID_BITS) - 1) + +/* + * A "store-pattern" is the word-pattern that is stored into a word + * location in the 4)STORE step of the rim-sequence. + * + * In the store-pattern, we shall encode: + * + * - The thread-id of the rim_thread performing the store + * (The most significant THREAD_ID_BITS) + * + * - The word-offset of the address into which the store is being + * performed (The next WORD_OFFSET_BITS) + * + * - The sweep_id of the current sweep in which the store is + * being performed. (The lower SWEEP_ID_BITS) + * + * Store Pattern: 32 bits + * |------------------|--------------------|---------------------------------| + * | Thread id | Word offset | sweep_id | + * |------------------|--------------------|---------------------------------| + * THREAD_ID_BITS WORD_OFFSET_BITS SWEEP_ID_BITS + * + * In the store pattern, the (Thread-id + Word-offset) uniquely identify the + * address to which the store is being performed i.e, + * address == map1 + + * (Thread-id * RIM_CHUNK_SIZE) + (Word-offset * WORD_SIZE) + * + * And the sweep_id in the store pattern identifies the time when the + * store was performed by the rim_thread. + * + * We shall use this property in the 3)COMPARE step of the + * rim-sequence. + */ +#define SWEEP_ID_SHIFT 0 +#define WORD_OFFSET_SHIFT (SWEEP_ID_BITS) +#define THREAD_ID_SHIFT (WORD_OFFSET_BITS + SWEEP_ID_BITS) + +/* + * Compute the store pattern for a given thread with id @tid, at + * location @addr in the sweep identified by @sweep_id + */ +static inline unsigned int compute_store_pattern(unsigned int tid, + unsigned int *addr, + unsigned int sweep_id) +{ + unsigned int ret = 0; + char *start = compute_chunk_start_addr(tid); + unsigned int word_offset = compute_word_offset(start, addr); + + ret += (tid & THREAD_ID_MASK) << THREAD_ID_SHIFT; + ret += (word_offset & WORD_OFFSET_MASK) << WORD_OFFSET_SHIFT; + ret += (sweep_id & SWEEP_ID_MASK) << SWEEP_ID_SHIFT; + return ret; +} + +/* Extract the thread-id from the given store-pattern */ +static inline unsigned int extract_tid(unsigned int pattern) +{ + unsigned int ret; + + ret = (pattern >> THREAD_ID_SHIFT) & THREAD_ID_MASK; + return ret; +} + +/* Extract the word-offset from the given store-pattern */ +static inline unsigned int extract_word_offset(unsigned int pattern) +{ + unsigned int ret; + + ret = (pattern >> WORD_OFFSET_SHIFT) & WORD_OFFSET_MASK; + + return ret; +} + +/* Extract the sweep-id from the given store-pattern */ +static inline unsigned int extract_sweep_id(unsigned int pattern) + +{ + unsigned int ret; + + ret = (pattern >> SWEEP_ID_SHIFT) & SWEEP_ID_MASK; + + return ret; +} + +/************************************************************ + * * + * Logging the output of the verification * + * * + ************************************************************/ +#define LOGDIR_NAME_SIZE 100 +static char logdir[LOGDIR_NAME_SIZE]; + +static FILE *fp[MAX_THREADS]; +static const char logfilename[] ="Thread-%02d-Chunk"; + +static inline void start_verification_log(unsigned int tid, + unsigned int *addr, + unsigned int cur_sweep_id, + unsigned int prev_sweep_id) +{ + FILE *f; + char logfile[30]; + char path[LOGDIR_NAME_SIZE + 30]; + char separator[2] = "/"; + char *chunk_start = compute_chunk_start_addr(tid); + unsigned int size = RIM_CHUNK_SIZE; + + sprintf(logfile, logfilename, tid); + strcpy(path, logdir); + strcat(path, separator); + strcat(path, logfile); + f = fopen(path, "w"); + + if (!f) { + err_msg("Unable to create logfile\n"); + } + + fp[tid] = f; + + fprintf(f, "----------------------------------------------------------\n"); + fprintf(f, "PID = %d\n", rim_process_pid); + fprintf(f, "Thread id = %02d\n", tid); + fprintf(f, "Chunk Start Addr = 0x%016lx\n", (unsigned long)chunk_start); + fprintf(f, "Chunk Size = %d\n", size); + fprintf(f, "Next Store Addr = 0x%016lx\n", (unsigned long)addr); + fprintf(f, "Current sweep-id = 0x%08x\n", cur_sweep_id); + fprintf(f, "Previous sweep-id = 0x%08x\n", prev_sweep_id); + fprintf(f, "----------------------------------------------------------\n"); +} + +static inline void log_anamoly(unsigned int tid, unsigned int *addr, + unsigned int expected, unsigned int observed) +{ + FILE *f = fp[tid]; + + fprintf(f, "Thread %02d: Addr 0x%lx: Expected 0x%x, Observed 0x%x\n", + tid, (unsigned long)addr, expected, observed); + fprintf(f, "Thread %02d: Expected Thread id = %02d\n", tid, extract_tid(expected)); + fprintf(f, "Thread %02d: Observed Thread id = %02d\n", tid, extract_tid(observed)); + fprintf(f, "Thread %02d: Expected Word offset = %03d\n", tid, extract_word_offset(expected)); + fprintf(f, "Thread %02d: Observed Word offset = %03d\n", tid, extract_word_offset(observed)); + fprintf(f, "Thread %02d: Expected sweep-id = 0x%x\n", tid, extract_sweep_id(expected)); + fprintf(f, "Thread %02d: Observed sweep-id = 0x%x\n", tid, extract_sweep_id(observed)); + fprintf(f, "----------------------------------------------------------\n"); +} + +static inline void end_verification_log(unsigned int tid, unsigned nr_anamolies) +{ + FILE *f = fp[tid]; + char logfile[30]; + char path[LOGDIR_NAME_SIZE + 30]; + char separator[] = "/"; + + fclose(f); + + if (nr_anamolies == 0) { + remove(path); + return; + } + + sprintf(logfile, logfilename, tid); + strcpy(path, logdir); + strcat(path, separator); + strcat(path, logfile); + + printf("Thread %02d chunk has %d corrupted words. For details check %s\n", + tid, nr_anamolies, path); +} + +/* + * When a COMPARE step of a rim-sequence fails, the rim_thread informs + * everyone else via the shared_memory pointed to by + * corruption_found variable. On seeing this, every thread verifies the + * content of its chunk as follows. + * + * Suppose a thread identified with @tid was about to store (but not + * yet stored) to @next_store_addr in its current sweep identified + * @cur_sweep_id. Let @prev_sweep_id indicate the previous sweep_id. + * + * This implies that for all the addresses @addr < @next_store_addr, + * Thread @tid has already performed a store as part of its current + * sweep. Hence we expect the content of such @addr to be: + * |-------------------------------------------------| + * | tid | word_offset(addr) | cur_sweep_id | + * |-------------------------------------------------| + * + * Since Thread @tid is yet to perform stores on address + * @next_store_addr and above, we expect the content of such an + * address @addr to be: + * |-------------------------------------------------| + * | tid | word_offset(addr) | prev_sweep_id | + * |-------------------------------------------------| + * + * The verifier function @verify_chunk does this verification and logs + * any anamolies that it finds. + */ +static void verify_chunk(unsigned int tid, unsigned int *next_store_addr, + unsigned int cur_sweep_id, + unsigned int prev_sweep_id) +{ + unsigned int *iter_ptr; + unsigned int size = RIM_CHUNK_SIZE; + unsigned int expected; + unsigned int observed; + char *chunk_start = compute_chunk_start_addr(tid); + + int nr_anamolies = 0; + + start_verification_log(tid, next_store_addr, + cur_sweep_id, prev_sweep_id); + + for (iter_ptr = (unsigned int *)chunk_start; + (unsigned long)iter_ptr < (unsigned long)chunk_start + size; + iter_ptr++) { + unsigned int expected_sweep_id; + + if (iter_ptr < next_store_addr) { + expected_sweep_id = cur_sweep_id; + } else { + expected_sweep_id = prev_sweep_id; + } + + expected = compute_store_pattern(tid, iter_ptr, expected_sweep_id); + + dcbf((volatile unsigned int*)iter_ptr); //Flush before reading + observed = *iter_ptr; + + if (observed != expected) { + nr_anamolies++; + log_anamoly(tid, iter_ptr, expected, observed); + } + } + + end_verification_log(tid, nr_anamolies); +} + +static void set_pthread_cpu(pthread_t th, int cpu) +{ + cpu_set_t run_cpu_mask; + struct sched_param param; + + CPU_ZERO(&run_cpu_mask); + CPU_SET(cpu, &run_cpu_mask); + pthread_setaffinity_np(th, sizeof(cpu_set_t), &run_cpu_mask); + + param.sched_priority = 1; + if (0 && sched_setscheduler(0, SCHED_FIFO, ¶m) == -1) { + /* haven't reproduced with this setting, it kills random preemption which may be a factor */ + fprintf(stderr, "could not set SCHED_FIFO, run as root?\n"); + } +} + +static void set_mycpu(int cpu) +{ + cpu_set_t run_cpu_mask; + struct sched_param param; + + CPU_ZERO(&run_cpu_mask); + CPU_SET(cpu, &run_cpu_mask); + sched_setaffinity(0, sizeof(cpu_set_t), &run_cpu_mask); + + param.sched_priority = 1; + if (0 && sched_setscheduler(0, SCHED_FIFO, ¶m) == -1) { + fprintf(stderr, "could not set SCHED_FIFO, run as root?\n"); + } +} + +static volatile int segv_wait; + +static void segv_handler(int signo, siginfo_t *info, void *extra) +{ + while (segv_wait) { + sched_yield(); + } + +} + +static void set_segv_handler(void) +{ + struct sigaction sa; + + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = segv_handler; + + if (sigaction(SIGSEGV, &sa, NULL) == -1) { + perror("sigaction"); + exit(EXIT_FAILURE); + } +} + +int timeout = 0; +/* + * This function is executed by every rim_thread. + * + * This function performs sweeps over the exclusive chunks of the + * rim_threads executing the rim-sequence one word at a time. + */ +static void *rim_fn(void *arg) +{ + unsigned int tid = *((unsigned int *)arg); + + int size = RIM_CHUNK_SIZE; + char *chunk_start = compute_chunk_start_addr(tid); + + unsigned int prev_sweep_id; + unsigned int cur_sweep_id = 0; + + /* word access */ + unsigned int pattern = cur_sweep_id; + unsigned int *pattern_ptr = &pattern; + unsigned int *w_ptr, read_data; + + set_segv_handler(); + + /* + * Let us initialize the chunk: + * + * Each word-aligned address addr in the chunk, + * is initialized to : + * |-------------------------------------------------| + * | tid | word_offset(addr) | 0 | + * |-------------------------------------------------| + */ + for (w_ptr = (unsigned int *)chunk_start; + (unsigned long)w_ptr < (unsigned long)(chunk_start) + size; + w_ptr++) { + + *pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id); + *w_ptr = *pattern_ptr; + } + + while (!corruption_found && !timeout) { + prev_sweep_id = cur_sweep_id; + cur_sweep_id = cur_sweep_id + 1; + + for (w_ptr = (unsigned int *)chunk_start; + (unsigned long)w_ptr < (unsigned long)(chunk_start) + size; + w_ptr++) { + unsigned int old_pattern; + + /* + * Compute the pattern that we would have + * stored at this location in the previous + * sweep. + */ + old_pattern = compute_store_pattern(tid, w_ptr, prev_sweep_id); + + /* + * FLUSH:Ensure that we flush the contents of + * the cache before loading + */ + dcbf((volatile unsigned int*)w_ptr); //Flush + + /* LOAD: Read the value */ + read_data = *w_ptr; //Load + + /* + * COMPARE: Is it the same as what we had stored + * in the previous sweep ? It better be! + */ + if (read_data != old_pattern) { + /* No it isn't! Tell everyone */ + corruption_found = 1; + } + + /* + * Before performing a store, let us check if + * any rim_thread has found a corruption. + */ + if (corruption_found || timeout) { + /* + * Yes. Someone (including us!) has found + * a corruption :( + * + * Let us verify that our chunk is + * correct. + */ + /* But first, let us allow the dust to settle down! */ + verify_chunk(tid, w_ptr, cur_sweep_id, prev_sweep_id); + + return 0; + } + + /* + * Compute the new pattern that we are going + * to write to this location + */ + *pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id); + + /* + * STORE: Now let us write this pattern into + * the location + */ + *w_ptr = *pattern_ptr; + } + } + + return NULL; +} + + +static unsigned long start_cpu = 0; +static unsigned long nrthreads = 4; + +static pthread_t mem_snapshot_thread; + +static void *mem_snapshot_fn(void *arg) +{ + int page_size = getpagesize(); + size_t size = page_size; + void *tmp = malloc(size); + + while (!corruption_found && !timeout) { + /* Stop memory migration once corruption is found */ + segv_wait = 1; + + mprotect(map1, size, PROT_READ); + + /* + * Load from the working alias (map1). Loading from map2 + * also fails. + */ + memcpy(tmp, map1, size); + + /* + * Stores must go via map2 which has write permissions, but + * the corrupted data tends to be seen in the snapshot buffer, + * so corruption does not appear to be introduced at the + * copy-back via map2 alias here. + */ + memcpy(map2, tmp, size); + /* + * Before releasing other threads, must ensure the copy + * back to + */ + asm volatile("sync" ::: "memory"); + mprotect(map1, size, PROT_READ|PROT_WRITE); + asm volatile("sync" ::: "memory"); + segv_wait = 0; + + usleep(1); /* This value makes a big difference */ + } + + return 0; +} + +void alrm_sighandler(int sig) +{ + timeout = 1; +} + +int main(int argc, char *argv[]) +{ + int c; + int page_size = getpagesize(); + time_t now; + int i, dir_error; + pthread_attr_t attr; + key_t shm_key = (key_t) getpid(); + int shmid, run_time = 20 * 60; + struct sigaction sa_alrm; + + snprintf(logdir, LOGDIR_NAME_SIZE, + "/tmp/logdir-%u", (unsigned int)getpid()); + while ((c = getopt(argc, argv, "r:hn:l:t:")) != -1) { + switch(c) { + case 'r': + start_cpu = strtoul(optarg, NULL, 10); + break; + case 'h': + printf("%s [-r <start_cpu>] [-n <nrthreads>] [-l <logdir>] [-t <timeout>]\n", argv[0]); + exit(0); + break; + case 'n': + nrthreads = strtoul(optarg, NULL, 10); + break; + case 'l': + strncpy(logdir, optarg, LOGDIR_NAME_SIZE - 1); + break; + case 't': + run_time = strtoul(optarg, NULL, 10); + break; + default: + printf("invalid option\n"); + exit(0); + break; + } + } + + if (nrthreads > MAX_THREADS) + nrthreads = MAX_THREADS; + + shmid = shmget(shm_key, page_size, IPC_CREAT|0666); + if (shmid < 0) { + err_msg("Failed shmget\n"); + } + + map1 = shmat(shmid, NULL, 0); + if (map1 == (void *) -1) { + err_msg("Failed shmat"); + } + + map2 = shmat(shmid, NULL, 0); + if (map2 == (void *) -1) { + err_msg("Failed shmat"); + } + + dir_error = mkdir(logdir, 0755); + + if (dir_error) { + err_msg("Failed mkdir"); + } + + printf("start_cpu list:%lu\n", start_cpu); + printf("number of worker threads:%lu + 1 snapshot thread\n", nrthreads); + printf("Allocated address:0x%016lx + secondary map:0x%016lx\n", (unsigned long)map1, (unsigned long)map2); + printf("logdir at : %s\n", logdir); + printf("Timeout: %d seconds\n", run_time); + + time(&now); + printf("=================================\n"); + printf(" Starting Test\n"); + printf(" %s", ctime(&now)); + printf("=================================\n"); + + for (i = 0; i < nrthreads; i++) { + if (1 && !fork()) { + prctl(PR_SET_PDEATHSIG, SIGKILL); + set_mycpu(start_cpu + i); + for (;;) + sched_yield(); + exit(0); + } + } + + + sa_alrm.sa_handler = &alrm_sighandler; + sigemptyset(&sa_alrm.sa_mask); + sa_alrm.sa_flags = 0; + + if (sigaction(SIGALRM, &sa_alrm, 0) == -1) { + err_msg("Failed signal handler registration\n"); + } + + alarm(run_time); + + pthread_attr_init(&attr); + for (i = 0; i < nrthreads; i++) { + rim_thread_ids[i] = i; + pthread_create(&rim_threads[i], &attr, rim_fn, &rim_thread_ids[i]); + set_pthread_cpu(rim_threads[i], start_cpu + i); + } + + pthread_create(&mem_snapshot_thread, &attr, mem_snapshot_fn, map1); + set_pthread_cpu(mem_snapshot_thread, start_cpu + i); + + + pthread_join(mem_snapshot_thread, NULL); + for (i = 0; i < nrthreads; i++) { + pthread_join(rim_threads[i], NULL); + } + + if (!timeout) { + time(&now); + printf("=================================\n"); + printf(" Data Corruption Detected\n"); + printf(" %s", ctime(&now)); + printf(" See logfiles in %s\n", logdir); + printf("=================================\n"); + return 1; + } + return 0; +} diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore index 951fe855f7cd..98f2708d86cc 100644 --- a/tools/testing/selftests/powerpc/tm/.gitignore +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -17,3 +17,4 @@ tm-vmx-unavail tm-unavailable tm-trap tm-sigreturn +tm-poison diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index c0734ed0ef56..b15a1a325bd0 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -5,7 +5,7 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \ tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \ $(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn tm-signal-sigreturn-nt \ - tm-signal-context-force-tm + tm-signal-context-force-tm tm-poison top_srcdir = ../../../../.. include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/tm/tm-poison.c b/tools/testing/selftests/powerpc/tm/tm-poison.c new file mode 100644 index 000000000000..977558497c16 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-poison.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2019, Gustavo Romero, Michael Neuling, IBM Corp. + * + * This test will spawn two processes. Both will be attached to the same + * CPU (CPU 0). The child will be in a loop writing to FP register f31 and + * VMX/VEC/Altivec register vr31 a known value, called poison, calling + * sched_yield syscall after to allow the parent to switch on the CPU. + * Parent will set f31 and vr31 to 1 and in a loop will check if f31 and + * vr31 remain 1 as expected until a given timeout (2m). If the issue is + * present child's poison will leak into parent's f31 or vr31 registers, + * otherwise, poison will never leak into parent's f31 and vr31 registers. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <inttypes.h> +#include <sched.h> +#include <sys/types.h> +#include <signal.h> +#include <inttypes.h> + +#include "tm.h" + +int tm_poison_test(void) +{ + int pid; + cpu_set_t cpuset; + uint64_t poison = 0xdeadbeefc0dec0fe; + uint64_t unknown = 0; + bool fail_fp = false; + bool fail_vr = false; + + SKIP_IF(!have_htm()); + + /* Attach both Child and Parent to CPU 0 */ + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + sched_setaffinity(0, sizeof(cpuset), &cpuset); + + pid = fork(); + if (!pid) { + /** + * child + */ + while (1) { + sched_yield(); + asm ( + "mtvsrd 31, %[poison];" // f31 = poison + "mtvsrd 63, %[poison];" // vr31 = poison + + : : [poison] "r" (poison) : ); + } + } + + /** + * parent + */ + asm ( + /* + * Set r3, r4, and f31 to known value 1 before entering + * in transaction. They won't be written after that. + */ + " li 3, 0x1 ;" + " li 4, 0x1 ;" + " mtvsrd 31, 4 ;" + + /* + * The Time Base (TB) is a 64-bit counter register that is + * independent of the CPU clock and which is incremented + * at a frequency of 512000000 Hz, so every 1.953125ns. + * So it's necessary 120s/0.000000001953125s = 61440000000 + * increments to get a 2 minutes timeout. Below we set that + * value in r5 and then use r6 to track initial TB value, + * updating TB values in r7 at every iteration and comparing it + * to r6. When r7 (current) - r6 (initial) > 61440000000 we bail + * out since for sure we spent already 2 minutes in the loop. + * SPR 268 is the TB register. + */ + " lis 5, 14 ;" + " ori 5, 5, 19996 ;" + " sldi 5, 5, 16 ;" // r5 = 61440000000 + + " mfspr 6, 268 ;" // r6 (TB initial) + "1: mfspr 7, 268 ;" // r7 (TB current) + " subf 7, 6, 7 ;" // r7 - r6 > 61440000000 ? + " cmpd 7, 5 ;" + " bgt 3f ;" // yes, exit + + /* + * Main loop to check f31 + */ + " tbegin. ;" // no, try again + " beq 1b ;" // restart if no timeout + " mfvsrd 3, 31 ;" // read f31 + " cmpd 3, 4 ;" // f31 == 1 ? + " bne 2f ;" // broken :-( + " tabort. 3 ;" // try another transaction + "2: tend. ;" // commit transaction + "3: mr %[unknown], 3 ;" // record r3 + + : [unknown] "=r" (unknown) + : + : "cr0", "r3", "r4", "r5", "r6", "r7", "vs31" + + ); + + /* + * On leak 'unknown' will contain 'poison' value from child, + * otherwise (no leak) 'unknown' will contain the same value + * as r3 before entering in transactional mode, i.e. 0x1. + */ + fail_fp = unknown != 0x1; + if (fail_fp) + printf("Unknown value %#"PRIx64" leaked into f31!\n", unknown); + else + printf("Good, no poison or leaked value into FP registers\n"); + + asm ( + /* + * Set r3, r4, and vr31 to known value 1 before entering + * in transaction. They won't be written after that. + */ + " li 3, 0x1 ;" + " li 4, 0x1 ;" + " mtvsrd 63, 4 ;" + + " lis 5, 14 ;" + " ori 5, 5, 19996 ;" + " sldi 5, 5, 16 ;" // r5 = 61440000000 + + " mfspr 6, 268 ;" // r6 (TB initial) + "1: mfspr 7, 268 ;" // r7 (TB current) + " subf 7, 6, 7 ;" // r7 - r6 > 61440000000 ? + " cmpd 7, 5 ;" + " bgt 3f ;" // yes, exit + + /* + * Main loop to check vr31 + */ + " tbegin. ;" // no, try again + " beq 1b ;" // restart if no timeout + " mfvsrd 3, 63 ;" // read vr31 + " cmpd 3, 4 ;" // vr31 == 1 ? + " bne 2f ;" // broken :-( + " tabort. 3 ;" // try another transaction + "2: tend. ;" // commit transaction + "3: mr %[unknown], 3 ;" // record r3 + + : [unknown] "=r" (unknown) + : + : "cr0", "r3", "r4", "r5", "r6", "r7", "vs63" + + ); + + /* + * On leak 'unknown' will contain 'poison' value from child, + * otherwise (no leak) 'unknown' will contain the same value + * as r3 before entering in transactional mode, i.e. 0x1. + */ + fail_vr = unknown != 0x1; + if (fail_vr) + printf("Unknown value %#"PRIx64" leaked into vr31!\n", unknown); + else + printf("Good, no poison or leaked value into VEC registers\n"); + + kill(pid, SIGKILL); + + return (fail_fp | fail_vr); +} + +int main(int argc, char *argv[]) +{ + /* Test completes in about 4m */ + test_harness_set_timeout(250); + return test_harness(tm_poison_test, "tm_poison_test"); +} diff --git a/tools/testing/selftests/rtc/settings b/tools/testing/selftests/rtc/settings new file mode 100644 index 000000000000..ba4d85f74cd6 --- /dev/null +++ b/tools/testing/selftests/rtc/settings @@ -0,0 +1 @@ +timeout=90 diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c index c0534e298b51..cb3fc09645c4 100644 --- a/tools/testing/selftests/vm/gup_benchmark.c +++ b/tools/testing/selftests/vm/gup_benchmark.c @@ -37,7 +37,7 @@ int main(int argc, char **argv) char *file = "/dev/zero"; char *p; - while ((opt = getopt(argc, argv, "m:r:n:f:tTLUSH")) != -1) { + while ((opt = getopt(argc, argv, "m:r:n:f:tTLUwSH")) != -1) { switch (opt) { case 'm': size = atoi(optarg) * MB; diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c index afff120c7be6..f45e510500c0 100644 --- a/tools/testing/selftests/watchdog/watchdog-test.c +++ b/tools/testing/selftests/watchdog/watchdog-test.c @@ -19,7 +19,7 @@ int fd; const char v = 'V'; -static const char sopts[] = "bdehp:t:Tn:NLf:"; +static const char sopts[] = "bdehp:t:Tn:NLf:i"; static const struct option lopts[] = { {"bootstatus", no_argument, NULL, 'b'}, {"disable", no_argument, NULL, 'd'}, @@ -32,6 +32,7 @@ static const struct option lopts[] = { {"getpretimeout", no_argument, NULL, 'N'}, {"gettimeleft", no_argument, NULL, 'L'}, {"file", required_argument, NULL, 'f'}, + {"info", no_argument, NULL, 'i'}, {NULL, no_argument, NULL, 0x0} }; @@ -72,6 +73,7 @@ static void usage(char *progname) printf("Usage: %s [options]\n", progname); printf(" -f, --file\t\tOpen watchdog device file\n"); printf("\t\t\tDefault is /dev/watchdog\n"); + printf(" -i, --info\t\tShow watchdog_info\n"); printf(" -b, --bootstatus\tGet last boot status (Watchdog/POR)\n"); printf(" -d, --disable\t\tTurn off the watchdog timer\n"); printf(" -e, --enable\t\tTurn on the watchdog timer\n"); @@ -97,6 +99,7 @@ int main(int argc, char *argv[]) int c; int oneshot = 0; char *file = "/dev/watchdog"; + struct watchdog_info info; setbuf(stdout, NULL); @@ -118,6 +121,16 @@ int main(int argc, char *argv[]) exit(-1); } + /* + * Validate that `file` is a watchdog device + */ + ret = ioctl(fd, WDIOC_GETSUPPORT, &info); + if (ret) { + printf("WDIOC_GETSUPPORT error '%s'\n", strerror(errno)); + close(fd); + exit(ret); + } + optind = 0; while ((c = getopt_long(argc, argv, sopts, lopts, NULL)) != -1) { @@ -205,6 +218,18 @@ int main(int argc, char *argv[]) case 'f': /* Handled above */ break; + case 'i': + /* + * watchdog_info was obtained as part of file open + * validation. So we just show it here. + */ + oneshot = 1; + printf("watchdog_info:\n"); + printf(" identity:\t\t%s\n", info.identity); + printf(" firmware_version:\t%u\n", + info.firmware_version); + printf(" options:\t\t%08x\n", info.options); + break; default: usage(argv[0]); diff --git a/tools/usb/usbip/libsrc/usbip_device_driver.c b/tools/usb/usbip/libsrc/usbip_device_driver.c index 051d7d3f443b..927a151fa9aa 100644 --- a/tools/usb/usbip/libsrc/usbip_device_driver.c +++ b/tools/usb/usbip/libsrc/usbip_device_driver.c @@ -69,7 +69,7 @@ int read_usb_vudc_device(struct udev_device *sdev, struct usbip_usb_device *dev) FILE *fd = NULL; struct udev_device *plat; const char *speed; - int ret = 0; + size_t ret; plat = udev_device_get_parent(sdev); path = udev_device_get_syspath(plat); @@ -79,8 +79,10 @@ int read_usb_vudc_device(struct udev_device *sdev, struct usbip_usb_device *dev) if (!fd) return -1; ret = fread((char *) &descr, sizeof(descr), 1, fd); - if (ret < 0) + if (ret != 1) { + err("Cannot read vudc device descr file: %s", strerror(errno)); goto err; + } fclose(fd); copy_descr_attr(dev, &descr, bDeviceClass); diff --git a/tools/virtio/crypto/hash.h b/tools/virtio/crypto/hash.h new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/virtio/crypto/hash.h diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h index f91aeb5fe571..8f41cd6bd5c0 100644 --- a/tools/virtio/linux/dma-mapping.h +++ b/tools/virtio/linux/dma-mapping.h @@ -29,4 +29,6 @@ enum dma_data_direction { #define dma_unmap_single(...) do { } while (0) #define dma_unmap_page(...) do { } while (0) +#define dma_max_mapping_size(...) SIZE_MAX + #endif diff --git a/tools/virtio/xen/xen.h b/tools/virtio/xen/xen.h new file mode 100644 index 000000000000..f569387d1403 --- /dev/null +++ b/tools/virtio/xen/xen.h @@ -0,0 +1,6 @@ +#ifndef XEN_XEN_STUB_H +#define XEN_XEN_STUB_H + +#define xen_domain() 0 + +#endif |