summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/arch/arm/include/uapi/asm/kvm.h9
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h2
-rw-r--r--tools/arch/x86/include/asm/required-features.h8
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h19
-rw-r--r--tools/bpf/Makefile2
-rw-r--r--tools/bpf/bpf_dbg.c7
-rw-r--r--tools/build/Build.include5
-rw-r--r--tools/gpio/gpio-event-mon.c2
-rw-r--r--tools/include/linux/compiler.h20
-rw-r--r--tools/include/linux/coresight-pmu.h13
-rw-r--r--tools/include/linux/spinlock.h1
-rw-r--r--tools/include/tools/config.h34
-rw-r--r--tools/include/uapi/asm-generic/mman-common.h3
-rw-r--r--tools/include/uapi/drm/i915_drm.h112
-rw-r--r--tools/include/uapi/linux/bpf.h1
-rw-r--r--tools/include/uapi/linux/if_link.h39
-rw-r--r--tools/include/uapi/linux/kvm.h23
-rw-r--r--tools/include/uapi/linux/perf_event.h18
-rw-r--r--tools/include/uapi/sound/asound.h1
-rwxr-xr-xtools/kvm/kvm_stat/kvm_stat11
-rw-r--r--tools/lib/subcmd/parse-options.c6
-rw-r--r--tools/objtool/Makefile6
-rw-r--r--tools/perf/Documentation/perf-config.txt5
-rw-r--r--tools/perf/Documentation/perf-mem.txt42
-rw-r--r--tools/perf/Documentation/perf-report.txt1
-rw-r--r--tools/perf/Documentation/perf-sched.txt4
-rw-r--r--tools/perf/Documentation/perf-script.txt17
-rw-r--r--tools/perf/Documentation/perf-stat.txt2
-rw-r--r--tools/perf/Documentation/perf-trace.txt3
-rw-r--r--tools/perf/Documentation/perf-version.txt24
-rw-r--r--tools/perf/Makefile.config12
-rw-r--r--tools/perf/Makefile.perf3
-rw-r--r--tools/perf/arch/arm/include/arch-tests.h12
-rw-r--r--tools/perf/arch/arm/tests/Build2
-rw-r--r--tools/perf/arch/arm/tests/arch-tests.c16
-rw-r--r--tools/perf/arch/arm/util/auxtrace.c13
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c13
-rw-r--r--tools/perf/arch/arm/util/cs-etm.h13
-rw-r--r--tools/perf/arch/arm/util/pmu.c13
-rw-r--r--tools/perf/arch/s390/util/auxtrace.c1
-rw-r--r--tools/perf/arch/s390/util/header.c18
-rw-r--r--tools/perf/arch/x86/Makefile2
-rw-r--r--tools/perf/arch/x86/annotate/instructions.c67
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl712
-rw-r--r--tools/perf/builtin-help.c2
-rw-r--r--tools/perf/builtin-mem.c4
-rw-r--r--tools/perf/builtin-script.c9
-rw-r--r--tools/perf/builtin-stat.c54
-rw-r--r--tools/perf/builtin-trace.c11
-rw-r--r--tools/perf/builtin-version.c85
-rw-r--r--tools/perf/perf.c10
-rw-r--r--tools/perf/perf.h1
-rw-r--r--tools/perf/pmu-events/arch/s390/mapfile.csv10
-rw-r--r--tools/perf/tests/attr/test-record-group-sampling3
-rw-r--r--tools/perf/tests/bpf-script-example.c2
-rw-r--r--tools/perf/tests/bpf-script-test-kbuild.c1
-rw-r--r--tools/perf/tests/builtin-test.c1
-rw-r--r--tools/perf/tests/mmap-basic.c2
-rwxr-xr-xtools/perf/tests/shell/record+probe_libc_inet_pton.sh6
-rw-r--r--tools/perf/trace/beauty/mmap.c3
-rw-r--r--tools/perf/ui/browser.c12
-rw-r--r--tools/perf/ui/browser.h2
-rw-r--r--tools/perf/ui/browsers/annotate.c38
-rw-r--r--tools/perf/ui/browsers/hists.c140
-rw-r--r--tools/perf/util/annotate.c77
-rw-r--r--tools/perf/util/annotate.h21
-rw-r--r--tools/perf/util/auxtrace.c72
-rw-r--r--tools/perf/util/c++/clang-test.cpp2
-rw-r--r--tools/perf/util/c++/clang.cpp11
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c3
-rw-r--r--tools/perf/util/cs-etm.c3
-rw-r--r--tools/perf/util/cs-etm.h13
-rw-r--r--tools/perf/util/dwarf-aux.c2
-rw-r--r--tools/perf/util/event.c4
-rw-r--r--tools/perf/util/evsel.c24
-rw-r--r--tools/perf/util/evsel.h1
-rwxr-xr-xtools/perf/util/generate-cmdlist.sh2
-rw-r--r--tools/perf/util/header.c3
-rw-r--r--tools/perf/util/hist.c81
-rw-r--r--tools/perf/util/hist.h7
-rw-r--r--tools/perf/util/machine.c30
-rw-r--r--tools/perf/util/map.h4
-rw-r--r--tools/perf/util/parse-events.y8
-rw-r--r--tools/perf/util/pmu.c28
-rw-r--r--tools/perf/util/session.c2
-rw-r--r--tools/perf/util/sort.c41
-rw-r--r--tools/perf/util/sort.h1
-rw-r--r--tools/perf/util/symbol.c8
-rw-r--r--tools/perf/util/syscalltbl.c6
-rw-r--r--tools/perf/util/trace-event-scripting.c4
-rw-r--r--tools/perf/util/util.h4
-rw-r--r--tools/scripts/Makefile.include2
-rwxr-xr-xtools/testing/ktest/config-bisect.pl770
-rwxr-xr-xtools/testing/ktest/ktest.pl533
-rw-r--r--tools/testing/ktest/sample.conf60
-rw-r--r--tools/testing/nvdimm/test/nfit.c323
-rw-r--r--tools/testing/nvdimm/test/nfit_test.h16
-rw-r--r--tools/testing/radix-tree/linux/gfp.h1
-rw-r--r--tools/testing/selftests/Makefile13
-rw-r--r--tools/testing/selftests/android/ion/.gitignore1
-rw-r--r--tools/testing/selftests/android/ion/Makefile5
-rw-r--r--tools/testing/selftests/android/ion/config1
-rw-r--r--tools/testing/selftests/android/ion/ionmap_test.c136
-rw-r--r--tools/testing/selftests/android/ion/ionutils.c6
-rw-r--r--tools/testing/selftests/bpf/.gitignore3
-rw-r--r--tools/testing/selftests/bpf/test_progs.c4
-rw-r--r--tools/testing/selftests/bpf/test_sock.c1
-rw-r--r--tools/testing/selftests/bpf/test_sock_addr.c1
-rwxr-xr-xtools/testing/selftests/bpf/test_sock_addr.sh4
-rw-r--r--tools/testing/selftests/filesystems/Makefile8
-rw-r--r--tools/testing/selftests/firmware/Makefile1
-rwxr-xr-xtools/testing/selftests/firmware/fw_lib.sh10
-rwxr-xr-xtools/testing/selftests/firmware/fw_run_tests.sh2
-rw-r--r--tools/testing/selftests/ftrace/test.d/functions7
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc39
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc54
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc58
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc44
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc50
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc50
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc48
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc54
-rw-r--r--tools/testing/selftests/futex/Makefile4
-rw-r--r--tools/testing/selftests/intel_pstate/Makefile5
-rw-r--r--tools/testing/selftests/kselftest.h3
-rw-r--r--tools/testing/selftests/kselftest_harness.h26
-rw-r--r--tools/testing/selftests/kvm/Makefile40
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h145
-rw-r--r--tools/testing/selftests/kvm/include/sparsebit.h75
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h45
-rw-r--r--tools/testing/selftests/kvm/include/vmx.h494
-rw-r--r--tools/testing/selftests/kvm/include/x86.h1043
-rw-r--r--tools/testing/selftests/kvm/lib/assert.c87
-rw-r--r--tools/testing/selftests/kvm/lib/elf.c197
-rw-r--r--tools/testing/selftests/kvm/lib/io.c158
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c1486
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util_internal.h67
-rw-r--r--tools/testing/selftests/kvm/lib/sparsebit.c2087
-rw-r--r--tools/testing/selftests/kvm/lib/vmx.c243
-rw-r--r--tools/testing/selftests/kvm/lib/x86.c700
-rw-r--r--tools/testing/selftests/kvm/set_sregs_test.c54
-rw-r--r--tools/testing/selftests/kvm/sync_regs_test.c232
-rw-r--r--tools/testing/selftests/kvm/vmx_tsc_adjust_test.c231
-rw-r--r--tools/testing/selftests/lib.mk7
-rw-r--r--tools/testing/selftests/net/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/.gitignore2
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/Makefile7
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/exec_target.c13
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/fork.c325
-rw-r--r--tools/testing/selftests/powerpc/copyloops/Makefile4
-rw-r--r--tools/testing/selftests/powerpc/tm/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-sigreturn.c92
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-unavailable.c24
-rw-r--r--tools/testing/selftests/proc/.gitignore8
-rw-r--r--tools/testing/selftests/proc/Makefile13
-rw-r--r--tools/testing/selftests/proc/config1
-rw-r--r--tools/testing/selftests/proc/proc-loadavg-001.c83
-rw-r--r--tools/testing/selftests/proc/proc-self-map-files-001.c82
-rw-r--r--tools/testing/selftests/proc/proc-self-map-files-002.c85
-rw-r--r--tools/testing/selftests/proc/proc-self-syscall.c60
-rw-r--r--tools/testing/selftests/proc/proc-self-wchan.c40
-rw-r--r--tools/testing/selftests/proc/proc-uptime-001.c45
-rw-r--r--tools/testing/selftests/proc/proc-uptime-002.c79
-rw-r--r--tools/testing/selftests/proc/proc-uptime.h74
-rw-r--r--tools/testing/selftests/proc/read.c147
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c15
-rw-r--r--tools/testing/selftests/x86/test_syscall_vdso.c35
-rw-r--r--tools/thermal/tmon/sysfs.c12
-rw-r--r--tools/thermal/tmon/tmon.c1
-rw-r--r--tools/virtio/ringtest/ptr_ring.c5
170 files changed, 11975 insertions, 1209 deletions
diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h
index 6edd177bb1c7..2ba95d6fe852 100644
--- a/tools/arch/arm/include/uapi/asm/kvm.h
+++ b/tools/arch/arm/include/uapi/asm/kvm.h
@@ -135,6 +135,15 @@ struct kvm_arch_memory_slot {
#define KVM_REG_ARM_CRM_SHIFT 7
#define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800
#define KVM_REG_ARM_32_CRN_SHIFT 11
+/*
+ * For KVM currently all guest registers are nonsecure, but we reserve a bit
+ * in the encoding to distinguish secure from nonsecure for AArch32 system
+ * registers that are banked by security. This is 1 for the secure banked
+ * register, and 0 for the nonsecure banked register or if the register is
+ * not banked by security.
+ */
+#define KVM_REG_ARM_SECURE_MASK 0x0000000010000000
+#define KVM_REG_ARM_SECURE_SHIFT 28
#define ARM_CP15_REG_SHIFT_MASK(x,n) \
(((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK)
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index f41079da38c5..d554c11e01ff 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -316,6 +316,7 @@
#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */
#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */
#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
+#define X86_FEATURE_TME (16*32+13) /* Intel Total Memory Encryption */
#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
@@ -328,6 +329,7 @@
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h
index fb3a6de7440b..6847d85400a8 100644
--- a/tools/arch/x86/include/asm/required-features.h
+++ b/tools/arch/x86/include/asm/required-features.h
@@ -53,12 +53,6 @@
# define NEED_MOVBE 0
#endif
-#ifdef CONFIG_X86_5LEVEL
-# define NEED_LA57 (1<<(X86_FEATURE_LA57 & 31))
-#else
-# define NEED_LA57 0
-#endif
-
#ifdef CONFIG_X86_64
#ifdef CONFIG_PARAVIRT
/* Paravirtualized systems may not have PSE or PGE available */
@@ -104,7 +98,7 @@
#define REQUIRED_MASK13 0
#define REQUIRED_MASK14 0
#define REQUIRED_MASK15 0
-#define REQUIRED_MASK16 (NEED_LA57)
+#define REQUIRED_MASK16 0
#define REQUIRED_MASK17 0
#define REQUIRED_MASK18 0
#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index f3a960488eae..c535c2fdea13 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -354,8 +354,25 @@ struct kvm_xcrs {
__u64 padding[16];
};
-/* definition of registers in kvm_run */
+#define KVM_SYNC_X86_REGS (1UL << 0)
+#define KVM_SYNC_X86_SREGS (1UL << 1)
+#define KVM_SYNC_X86_EVENTS (1UL << 2)
+
+#define KVM_SYNC_X86_VALID_FIELDS \
+ (KVM_SYNC_X86_REGS| \
+ KVM_SYNC_X86_SREGS| \
+ KVM_SYNC_X86_EVENTS)
+
+/* kvm_sync_regs struct included by kvm_run struct */
struct kvm_sync_regs {
+ /* Members of this structure are potentially malicious.
+ * Care must be taken by code reading, esp. interpreting,
+ * data fields from them inside KVM to prevent TOCTOU and
+ * double-fetch types of vulnerabilities.
+ */
+ struct kvm_regs regs;
+ struct kvm_sregs sregs;
+ struct kvm_vcpu_events events;
};
#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index 1ea545965ee3..53b60ad452f5 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -76,6 +76,8 @@ $(OUTPUT)bpf_asm: $(OUTPUT)bpf_asm.o $(OUTPUT)bpf_exp.yacc.o $(OUTPUT)bpf_exp.le
$(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^
$(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c
+$(OUTPUT)bpf_exp.yacc.o: $(OUTPUT)bpf_exp.yacc.c
+$(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c
clean: bpftool_clean
$(call QUIET_CLEAN, bpf-progs)
diff --git a/tools/bpf/bpf_dbg.c b/tools/bpf/bpf_dbg.c
index 4f254bcc4423..61b9aa5d6415 100644
--- a/tools/bpf/bpf_dbg.c
+++ b/tools/bpf/bpf_dbg.c
@@ -1063,7 +1063,7 @@ static int cmd_load_pcap(char *file)
static int cmd_load(char *arg)
{
- char *subcmd, *cont, *tmp = strdup(arg);
+ char *subcmd, *cont = NULL, *tmp = strdup(arg);
int ret = CMD_OK;
subcmd = strtok_r(tmp, " ", &cont);
@@ -1073,7 +1073,10 @@ static int cmd_load(char *arg)
bpf_reset();
bpf_reset_breakpoints();
- ret = cmd_load_bpf(cont);
+ if (!cont)
+ ret = CMD_ERR;
+ else
+ ret = cmd_load_bpf(cont);
} else if (matches(subcmd, "pcap") == 0) {
ret = cmd_load_pcap(cont);
} else {
diff --git a/tools/build/Build.include b/tools/build/Build.include
index 418871d02ebf..a4bbb984941d 100644
--- a/tools/build/Build.include
+++ b/tools/build/Build.include
@@ -12,6 +12,7 @@
# Convenient variables
comma := ,
squote := '
+pound := \#
###
# Name of target with a '.' as filename prefix. foo/bar.o => foo/.bar.o
@@ -43,11 +44,11 @@ echo-cmd = $(if $($(quiet)cmd_$(1)),\
###
# Replace >$< with >$$< to preserve $ when reloading the .cmd file
# (needed for make)
-# Replace >#< with >\#< to avoid starting a comment in the .cmd file
+# Replace >#< with >$(pound)< to avoid starting a comment in the .cmd file
# (needed for make)
# Replace >'< with >'\''< to be able to enclose the whole string in '...'
# (needed for the shell)
-make-cmd = $(call escsq,$(subst \#,\\\#,$(subst $$,$$$$,$(cmd_$(1)))))
+make-cmd = $(call escsq,$(subst $(pound),$$(pound),$(subst $$,$$$$,$(cmd_$(1)))))
###
# Find any prerequisites that is newer than target or that does not exist.
diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c
index dac4d4131d9b..c864544efe05 100644
--- a/tools/gpio/gpio-event-mon.c
+++ b/tools/gpio/gpio-event-mon.c
@@ -99,7 +99,7 @@ int monitor_device(const char *device_name,
ret = -EIO;
break;
}
- fprintf(stdout, "GPIO EVENT %" PRIu64 ": ", event.timestamp);
+ fprintf(stdout, "GPIO EVENT %llu: ", event.timestamp);
switch (event.id) {
case GPIOEVENT_EVENT_RISING_EDGE:
fprintf(stdout, "rising edge");
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index 04e32f965ad7..1827c2f973f9 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -151,11 +151,21 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
* required ordering.
*/
-#define READ_ONCE(x) \
- ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
-
-#define WRITE_ONCE(x, val) \
- ({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
+#define READ_ONCE(x) \
+({ \
+ union { typeof(x) __val; char __c[1]; } __u = \
+ { .__c = { 0 } }; \
+ __read_once_size(&(x), __u.__c, sizeof(x)); \
+ __u.__val; \
+})
+
+#define WRITE_ONCE(x, val) \
+({ \
+ union { typeof(x) __val; char __c[1]; } __u = \
+ { .__val = (val) }; \
+ __write_once_size(&(x), __u.__c, sizeof(x)); \
+ __u.__val; \
+})
#ifndef __fallthrough
diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h
index edfeaba95429..a1a959ba24ff 100644
--- a/tools/include/linux/coresight-pmu.h
+++ b/tools/include/linux/coresight-pmu.h
@@ -1,18 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright(C) 2015 Linaro Limited. All rights reserved.
* Author: Mathieu Poirier <mathieu.poirier@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _LINUX_CORESIGHT_PMU_H
diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h
index 4ed569fcb139..b21b586b9854 100644
--- a/tools/include/linux/spinlock.h
+++ b/tools/include/linux/spinlock.h
@@ -7,6 +7,7 @@
#define spinlock_t pthread_mutex_t
#define DEFINE_SPINLOCK(x) pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER;
+#define __SPIN_LOCK_UNLOCKED(x) (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER
#define spin_lock_irqsave(x, f) (void)f, pthread_mutex_lock(x)
#define spin_unlock_irqrestore(x, f) (void)f, pthread_mutex_unlock(x)
diff --git a/tools/include/tools/config.h b/tools/include/tools/config.h
new file mode 100644
index 000000000000..08ade7df8132
--- /dev/null
+++ b/tools/include/tools/config.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_CONFIG_H
+#define _TOOLS_CONFIG_H
+
+/* Subset of include/linux/kconfig.h */
+
+#define __ARG_PLACEHOLDER_1 0,
+#define __take_second_arg(__ignored, val, ...) val
+
+/*
+ * Helper macros to use CONFIG_ options in C/CPP expressions. Note that
+ * these only work with boolean and tristate options.
+ */
+
+/*
+ * Getting something that works in C and CPP for an arg that may or may
+ * not be defined is tricky. Here, if we have "#define CONFIG_BOOGER 1"
+ * we match on the placeholder define, insert the "0," for arg1 and generate
+ * the triplet (0, 1, 0). Then the last step cherry picks the 2nd arg (a one).
+ * When CONFIG_BOOGER is not defined, we generate a (... 1, 0) pair, and when
+ * the last step cherry picks the 2nd arg, we get a zero.
+ */
+#define __is_defined(x) ___is_defined(x)
+#define ___is_defined(val) ____is_defined(__ARG_PLACEHOLDER_##val)
+#define ____is_defined(arg1_or_junk) __take_second_arg(arg1_or_junk 1, 0)
+
+/*
+ * IS_BUILTIN(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y', 0
+ * otherwise. For boolean options, this is equivalent to
+ * IS_ENABLED(CONFIG_FOO).
+ */
+#define IS_BUILTIN(option) __is_defined(option)
+
+#endif /* _TOOLS_CONFIG_H */
diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h
index f8b134f5608f..e7ee32861d51 100644
--- a/tools/include/uapi/asm-generic/mman-common.h
+++ b/tools/include/uapi/asm-generic/mman-common.h
@@ -27,6 +27,9 @@
# define MAP_UNINITIALIZED 0x0 /* Don't support this flag */
#endif
+/* 0x0100 - 0x80000 flags are defined in asm-generic/mman.h */
+#define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */
+
/*
* Flags for mlock
*/
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 536ee4febd74..7f5634ce8e88 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -318,6 +318,7 @@ typedef struct _drm_i915_sarea {
#define DRM_I915_PERF_OPEN 0x36
#define DRM_I915_PERF_ADD_CONFIG 0x37
#define DRM_I915_PERF_REMOVE_CONFIG 0x38
+#define DRM_I915_QUERY 0x39
#define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
#define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -375,6 +376,7 @@ typedef struct _drm_i915_sarea {
#define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param)
#define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
#define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
+#define DRM_IOCTL_I915_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query)
/* Allow drivers to submit batchbuffers directly to hardware, relying
* on the security mechanisms provided by hardware.
@@ -1358,7 +1360,9 @@ struct drm_intel_overlay_attrs {
* active on a given plane.
*/
-#define I915_SET_COLORKEY_NONE (1<<0) /* disable color key matching */
+#define I915_SET_COLORKEY_NONE (1<<0) /* Deprecated. Instead set
+ * flags==0 to disable colorkeying.
+ */
#define I915_SET_COLORKEY_DESTINATION (1<<1)
#define I915_SET_COLORKEY_SOURCE (1<<2)
struct drm_intel_sprite_colorkey {
@@ -1604,15 +1608,115 @@ struct drm_i915_perf_oa_config {
__u32 n_flex_regs;
/*
- * These fields are pointers to tuples of u32 values (register
- * address, value). For example the expected length of the buffer
- * pointed by mux_regs_ptr is (2 * sizeof(u32) * n_mux_regs).
+ * These fields are pointers to tuples of u32 values (register address,
+ * value). For example the expected length of the buffer pointed by
+ * mux_regs_ptr is (2 * sizeof(u32) * n_mux_regs).
*/
__u64 mux_regs_ptr;
__u64 boolean_regs_ptr;
__u64 flex_regs_ptr;
};
+struct drm_i915_query_item {
+ __u64 query_id;
+#define DRM_I915_QUERY_TOPOLOGY_INFO 1
+
+ /*
+ * When set to zero by userspace, this is filled with the size of the
+ * data to be written at the data_ptr pointer. The kernel sets this
+ * value to a negative value to signal an error on a particular query
+ * item.
+ */
+ __s32 length;
+
+ /*
+ * Unused for now. Must be cleared to zero.
+ */
+ __u32 flags;
+
+ /*
+ * Data will be written at the location pointed by data_ptr when the
+ * value of length matches the length of the data to be written by the
+ * kernel.
+ */
+ __u64 data_ptr;
+};
+
+struct drm_i915_query {
+ __u32 num_items;
+
+ /*
+ * Unused for now. Must be cleared to zero.
+ */
+ __u32 flags;
+
+ /*
+ * This points to an array of num_items drm_i915_query_item structures.
+ */
+ __u64 items_ptr;
+};
+
+/*
+ * Data written by the kernel with query DRM_I915_QUERY_TOPOLOGY_INFO :
+ *
+ * data: contains the 3 pieces of information :
+ *
+ * - the slice mask with one bit per slice telling whether a slice is
+ * available. The availability of slice X can be queried with the following
+ * formula :
+ *
+ * (data[X / 8] >> (X % 8)) & 1
+ *
+ * - the subslice mask for each slice with one bit per subslice telling
+ * whether a subslice is available. The availability of subslice Y in slice
+ * X can be queried with the following formula :
+ *
+ * (data[subslice_offset +
+ * X * subslice_stride +
+ * Y / 8] >> (Y % 8)) & 1
+ *
+ * - the EU mask for each subslice in each slice with one bit per EU telling
+ * whether an EU is available. The availability of EU Z in subslice Y in
+ * slice X can be queried with the following formula :
+ *
+ * (data[eu_offset +
+ * (X * max_subslices + Y) * eu_stride +
+ * Z / 8] >> (Z % 8)) & 1
+ */
+struct drm_i915_query_topology_info {
+ /*
+ * Unused for now. Must be cleared to zero.
+ */
+ __u16 flags;
+
+ __u16 max_slices;
+ __u16 max_subslices;
+ __u16 max_eus_per_subslice;
+
+ /*
+ * Offset in data[] at which the subslice masks are stored.
+ */
+ __u16 subslice_offset;
+
+ /*
+ * Stride at which each of the subslice masks for each slice are
+ * stored.
+ */
+ __u16 subslice_stride;
+
+ /*
+ * Offset in data[] at which the EU masks are stored.
+ */
+ __u16 eu_offset;
+
+ /*
+ * Stride at which each of the EU masks for each subslice are stored.
+ */
+ __u16 eu_stride;
+
+ __u8 data[];
+};
+
#if defined(__cplusplus)
}
#endif
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 9d07465023a2..c5ec89732a8d 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -864,6 +864,7 @@ enum bpf_func_id {
/* BPF_FUNC_skb_set_tunnel_key flags. */
#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
#define BPF_F_DONT_FRAGMENT (1ULL << 2)
+#define BPF_F_SEQ_NUMBER (1ULL << 3)
/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
* BPF_FUNC_perf_event_read_value flags.
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 6d9447700e18..68699f654118 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -941,4 +941,43 @@ enum {
IFLA_EVENT_BONDING_OPTIONS, /* change in bonding options */
};
+/* tun section */
+
+enum {
+ IFLA_TUN_UNSPEC,
+ IFLA_TUN_OWNER,
+ IFLA_TUN_GROUP,
+ IFLA_TUN_TYPE,
+ IFLA_TUN_PI,
+ IFLA_TUN_VNET_HDR,
+ IFLA_TUN_PERSIST,
+ IFLA_TUN_MULTI_QUEUE,
+ IFLA_TUN_NUM_QUEUES,
+ IFLA_TUN_NUM_DISABLED_QUEUES,
+ __IFLA_TUN_MAX,
+};
+
+#define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1)
+
+/* rmnet section */
+
+#define RMNET_FLAGS_INGRESS_DEAGGREGATION (1U << 0)
+#define RMNET_FLAGS_INGRESS_MAP_COMMANDS (1U << 1)
+#define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2)
+#define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3)
+
+enum {
+ IFLA_RMNET_UNSPEC,
+ IFLA_RMNET_MUX_ID,
+ IFLA_RMNET_FLAGS,
+ __IFLA_RMNET_MAX,
+};
+
+#define IFLA_RMNET_MAX (__IFLA_RMNET_MAX - 1)
+
+struct ifla_rmnet_flags {
+ __u32 flags;
+ __u32 mask;
+};
+
#endif /* _UAPI_LINUX_IF_LINK_H */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 7b26d4b0b052..1065006c9bf5 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -396,6 +396,10 @@ struct kvm_run {
char padding[256];
};
+ /* 2048 is the size of the char array used to bound/pad the size
+ * of the union that holds sync regs.
+ */
+ #define SYNC_REGS_SIZE_BYTES 2048
/*
* shared registers between kvm and userspace.
* kvm_valid_regs specifies the register classes set by the host
@@ -407,7 +411,7 @@ struct kvm_run {
__u64 kvm_dirty_regs;
union {
struct kvm_sync_regs regs;
- char padding[2048];
+ char padding[SYNC_REGS_SIZE_BYTES];
} s;
};
@@ -925,7 +929,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_S390_GS 140
#define KVM_CAP_S390_AIS 141
#define KVM_CAP_SPAPR_TCE_VFIO 142
-#define KVM_CAP_X86_GUEST_MWAIT 143
+#define KVM_CAP_X86_DISABLE_EXITS 143
#define KVM_CAP_ARM_USER_IRQ 144
#define KVM_CAP_S390_CMMA_MIGRATION 145
#define KVM_CAP_PPC_FWNMI 146
@@ -936,6 +940,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_PPC_GET_CPU_CHAR 151
#define KVM_CAP_S390_BPB 152
#define KVM_CAP_GET_MSR_FEATURES 153
+#define KVM_CAP_HYPERV_EVENTFD 154
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1375,6 +1380,10 @@ struct kvm_enc_region {
#define KVM_MEMORY_ENCRYPT_REG_REGION _IOR(KVMIO, 0xbb, struct kvm_enc_region)
#define KVM_MEMORY_ENCRYPT_UNREG_REGION _IOR(KVMIO, 0xbc, struct kvm_enc_region)
+/* Available with KVM_CAP_HYPERV_EVENTFD */
+#define KVM_HYPERV_EVENTFD _IOW(KVMIO, 0xbd, struct kvm_hyperv_eventfd)
+
+
/* Secure Encrypted Virtualization command */
enum sev_cmd_id {
/* Guest initialization commands */
@@ -1515,4 +1524,14 @@ struct kvm_assigned_msix_entry {
#define KVM_ARM_DEV_EL1_PTIMER (1 << 1)
#define KVM_ARM_DEV_PMU (1 << 2)
+struct kvm_hyperv_eventfd {
+ __u32 conn_id;
+ __s32 fd;
+ __u32 flags;
+ __u32 padding[3];
+};
+
+#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff
+#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0)
+
#endif /* __LINUX_KVM_H */
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 912b85b52344..b8e288a1f740 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -650,11 +650,23 @@ struct perf_event_mmap_page {
#define PERF_RECORD_MISC_COMM_EXEC (1 << 13)
#define PERF_RECORD_MISC_SWITCH_OUT (1 << 13)
/*
- * Indicates that the content of PERF_SAMPLE_IP points to
- * the actual instruction that triggered the event. See also
- * perf_event_attr::precise_ip.
+ * These PERF_RECORD_MISC_* flags below are safely reused
+ * for the following events:
+ *
+ * PERF_RECORD_MISC_EXACT_IP - PERF_RECORD_SAMPLE of precise events
+ * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT - PERF_RECORD_SWITCH* events
+ *
+ *
+ * PERF_RECORD_MISC_EXACT_IP:
+ * Indicates that the content of PERF_SAMPLE_IP points to
+ * the actual instruction that triggered the event. See also
+ * perf_event_attr::precise_ip.
+ *
+ * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT:
+ * Indicates that thread was preempted in TASK_RUNNING state.
*/
#define PERF_RECORD_MISC_EXACT_IP (1 << 14)
+#define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14)
/*
* Reserve the last bit to indicate some extended misc field
*/
diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h
index 07d61583fd02..ed0a120d4f08 100644
--- a/tools/include/uapi/sound/asound.h
+++ b/tools/include/uapi/sound/asound.h
@@ -242,6 +242,7 @@ typedef int __bitwise snd_pcm_format_t;
#define SNDRV_PCM_FORMAT_DSD_U16_BE ((__force snd_pcm_format_t) 51) /* DSD, 2-byte samples DSD (x16), big endian */
#define SNDRV_PCM_FORMAT_DSD_U32_BE ((__force snd_pcm_format_t) 52) /* DSD, 4-byte samples DSD (x32), big endian */
#define SNDRV_PCM_FORMAT_LAST SNDRV_PCM_FORMAT_DSD_U32_BE
+#define SNDRV_PCM_FORMAT_FIRST SNDRV_PCM_FORMAT_S8
#ifdef SNDRV_LITTLE_ENDIAN
#define SNDRV_PCM_FORMAT_S16 SNDRV_PCM_FORMAT_S16_LE
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 5898c22ba310..56c4b3f8a01b 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -1121,9 +1121,6 @@ class Tui(object):
self.screen.refresh()
def _refresh_body(self, sleeptime):
- def is_child_field(field):
- return field.find('(') != -1
-
def insert_child(sorted_items, child, values, parent):
num = len(sorted_items)
for i in range(0, num):
@@ -1134,12 +1131,14 @@ class Tui(object):
def get_sorted_events(self, stats):
""" separate parent and child events """
if self._sorting == SORT_DEFAULT:
- def sortkey((_k, v)):
+ def sortkey(pair):
# sort by (delta value, overall value)
+ v = pair[1]
return (v.delta, v.value)
else:
- def sortkey((_k, v)):
+ def sortkey(pair):
# sort by overall value
+ v = pair[1]
return v.value
childs = []
@@ -1613,7 +1612,7 @@ def assign_globals():
global PATH_DEBUGFS_TRACING
debugfs = ''
- for line in file('/proc/mounts'):
+ for line in open('/proc/mounts'):
if line.split(' ')[0] == 'debugfs':
debugfs = line.split(' ')[1]
break
diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c
index f6a1babcbac4..cb7154eccbdc 100644
--- a/tools/lib/subcmd/parse-options.c
+++ b/tools/lib/subcmd/parse-options.c
@@ -433,7 +433,7 @@ match:
if (ambiguous_option) {
fprintf(stderr,
- " Error: Ambiguous option: %s (could be --%s%s or --%s%s)",
+ " Error: Ambiguous option: %s (could be --%s%s or --%s%s)\n",
arg,
(ambiguous_flags & OPT_UNSET) ? "no-" : "",
ambiguous_option->long_name,
@@ -458,7 +458,7 @@ static void check_typos(const char *arg, const struct option *options)
return;
if (strstarts(arg, "no-")) {
- fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)", arg);
+ fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)\n", arg);
exit(129);
}
@@ -466,7 +466,7 @@ static void check_typos(const char *arg, const struct option *options)
if (!options->long_name)
continue;
if (strstarts(options->long_name, arg)) {
- fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)", arg);
+ fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)\n", arg);
exit(129);
}
}
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index e6acc281dd37..f76d9914686a 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -31,11 +31,11 @@ INCLUDES := -I$(srctree)/tools/include \
-I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
-I$(srctree)/tools/objtool/arch/$(ARCH)/include
WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed
-CFLAGS += -Wall -Werror $(WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES)
-LDFLAGS += -lelf $(LIBSUBCMD)
+CFLAGS += -Werror $(WARNINGS) $(HOSTCFLAGS) -g $(INCLUDES)
+LDFLAGS += -lelf $(LIBSUBCMD) $(HOSTLDFLAGS)
# Allow old libelf to be used:
-elfshdr := $(shell echo '\#include <libelf.h>' | $(CC) $(CFLAGS) -x c -E - | grep elf_getshdr)
+elfshdr := $(shell echo '$(pound)include <libelf.h>' | $(CC) $(CFLAGS) -x c -E - | grep elf_getshdr)
CFLAGS += $(if $(elfshdr),,-DLIBELF_USE_DEPRECATED)
AWK = awk
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index 5b4fff3adc4b..32f4a898e3f2 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -334,6 +334,11 @@ annotate.*::
99.93 │ mov %eax,%eax
+ annotate.offset_level::
+ Default is '1', meaning just jump targets will have offsets show right beside
+ the instruction. When set to '2' 'call' instructions will also have its offsets
+ shown, 3 or higher will show offsets for all instructions.
+
hist.*::
hist.percentage::
This option control the way to calculate overhead of filtered entries -
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
index b0211410969b..f8d2167cf3e7 100644
--- a/tools/perf/Documentation/perf-mem.txt
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -28,29 +28,46 @@ OPTIONS
<command>...::
Any command you can specify in a shell.
+-i::
+--input=<file>::
+ Input file name.
+
-f::
--force::
Don't do ownership validation
-t::
---type=::
+--type=<type>::
Select the memory operation type: load or store (default: load,store)
-D::
---dump-raw-samples=::
+--dump-raw-samples::
Dump the raw decoded samples on the screen in a format that is easy to parse with
one sample per line.
-x::
---field-separator::
+--field-separator=<separator>::
Specify the field separator used when dump raw samples (-D option). By default,
The separator is the space character.
-C::
---cpu-list::
- Restrict dump of raw samples to those provided via this option. Note that the same
- option can be passed in record mode. It will be interpreted the same way as perf
- record.
+--cpu=<cpu>::
+ Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a
+ comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. Default
+ is to monitor all CPUS.
+-U::
+--hide-unresolved::
+ Only display entries resolved to a symbol.
+
+-p::
+--phys-data::
+ Record/Report sample physical addresses
+
+RECORD OPTIONS
+--------------
+-e::
+--event <event>::
+ Event selector. Use 'perf mem record -e list' to list available events.
-K::
--all-kernel::
@@ -60,12 +77,15 @@ OPTIONS
--all-user::
Configure all used events to run in user space.
---ldload::
+-v::
+--verbose::
+ Be more verbose (show counter open errors, etc)
+
+--ldlat <n>::
Specify desired latency for loads event.
--p::
---phys-data::
- Record/Report sample physical addresses
+In addition, for report all perf report options are valid, and for record
+all perf record options.
SEE ALSO
--------
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index e1a660e60849..917e36fde6d8 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -80,6 +80,7 @@ OPTIONS
- comm: command (name) of the task which can be read via /proc/<pid>/comm
- pid: command and tid of the task
- dso: name of library or module executed at the time of sample
+ - dso_size: size of library or module executed at the time of sample
- symbol: name of function executed at the time of sample
- symbol_size: size of function executed at the time of sample
- parent: name of function matched to the parent regex filter. Unmatched
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt
index bb33601a823b..63f938b887dd 100644
--- a/tools/perf/Documentation/perf-sched.txt
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -104,8 +104,8 @@ OPTIONS for 'perf sched timehist'
kallsyms pathname
-g::
---no-call-graph::
- Do not display call chains if present.
+--call-graph::
+ Display call chains if present (default on).
--max-stack::
Maximum number of functions to display in backtrace, default 5.
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 36ec0257f8d3..afdafe2110a1 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -228,14 +228,15 @@ OPTIONS
For sample events it's possible to display misc field with -F +misc option,
following letters are displayed for each bit:
- PERF_RECORD_MISC_KERNEL K
- PERF_RECORD_MISC_USER U
- PERF_RECORD_MISC_HYPERVISOR H
- PERF_RECORD_MISC_GUEST_KERNEL G
- PERF_RECORD_MISC_GUEST_USER g
- PERF_RECORD_MISC_MMAP_DATA* M
- PERF_RECORD_MISC_COMM_EXEC E
- PERF_RECORD_MISC_SWITCH_OUT S
+ PERF_RECORD_MISC_KERNEL K
+ PERF_RECORD_MISC_USER U
+ PERF_RECORD_MISC_HYPERVISOR H
+ PERF_RECORD_MISC_GUEST_KERNEL G
+ PERF_RECORD_MISC_GUEST_USER g
+ PERF_RECORD_MISC_MMAP_DATA* M
+ PERF_RECORD_MISC_COMM_EXEC E
+ PERF_RECORD_MISC_SWITCH_OUT S
+ PERF_RECORD_MISC_SWITCH_OUT_PREEMPT Sp
$ perf script -F +misc ...
sched-messaging 1414 K 28690.636582: 4590 cycles ...
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index f15b306be183..e6c3b4e555c2 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -153,7 +153,7 @@ perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- m
-I msecs::
--interval-print msecs::
-Print count deltas every N milliseconds (minimum: 10ms)
+Print count deltas every N milliseconds (minimum: 1ms)
The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals. Use with caution.
example: 'perf stat -I 1000 -e cycles -a sleep 5'
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 5a7035c5c523..115db9e06ecd 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -117,6 +117,9 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
--sched::
Accrue thread runtime and provide a summary at the end of the session.
+--failure::
+ Show only syscalls that failed, i.e. that returned < 0.
+
-i::
--input::
Process events from a given perf data file.
diff --git a/tools/perf/Documentation/perf-version.txt b/tools/perf/Documentation/perf-version.txt
new file mode 100644
index 000000000000..e207b7cfca26
--- /dev/null
+++ b/tools/perf/Documentation/perf-version.txt
@@ -0,0 +1,24 @@
+perf-version(1)
+===============
+
+NAME
+----
+perf-version - display the version of perf binary
+
+SYNOPSIS
+--------
+'perf version' [--build-options]
+
+DESCRIPTION
+-----------
+With no options given, the 'perf version' prints the perf version
+on the standard output.
+
+If the option '--build-options' is given, then the status of
+compiled-in libraries are printed on the standard output.
+
+OPTIONS
+-------
+--build-options::
+ Prints the status of compiled-in libraries on the
+ standard output.
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 98ff73648b51..ae7dc46e8f8a 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -68,7 +68,7 @@ ifeq ($(NO_PERF_REGS),0)
endif
ifneq ($(NO_SYSCALL_TABLE),1)
- CFLAGS += -DHAVE_SYSCALL_TABLE
+ CFLAGS += -DHAVE_SYSCALL_TABLE_SUPPORT
endif
# So far there's only x86 and arm libdw unwind support merged in perf.
@@ -346,12 +346,16 @@ else
ifneq ($(feature-dwarf_getlocations), 1)
msg := $(warning Old libdw.h, finding variables at given 'perf probe' point will not work, install elfutils-devel/libdw-dev >= 0.157);
else
- CFLAGS += -DHAVE_DWARF_GETLOCATIONS
+ CFLAGS += -DHAVE_DWARF_GETLOCATIONS_SUPPORT
endif # dwarf_getlocations
endif # Dwarf support
endif # libelf support
endif # NO_LIBELF
+ifeq ($(feature-glibc), 1)
+ CFLAGS += -DHAVE_GLIBC_SUPPORT
+endif
+
ifdef NO_DWARF
NO_LIBDW_DWARF_UNWIND := 1
endif
@@ -635,6 +639,7 @@ else
else
LDFLAGS += $(PERL_EMBED_LDFLAGS)
EXTLIBS += $(PERL_EMBED_LIBADD)
+ CFLAGS += -DHAVE_LIBPERL_SUPPORT
$(call detected,CONFIG_LIBPERL)
endif
endif
@@ -671,6 +676,7 @@ else
LDFLAGS += $(PYTHON_EMBED_LDFLAGS)
EXTLIBS += $(PYTHON_EMBED_LIBADD)
LANG_BINDINGS += $(obj-perf)python/perf.so
+ CFLAGS += -DHAVE_LIBPYTHON_SUPPORT
$(call detected,CONFIG_LIBPYTHON)
endif
endif
@@ -841,7 +847,7 @@ ifndef NO_JVMTI
ifeq ($(feature-jvmti), 1)
$(call detected_var,JDIR)
else
- $(warning No openjdk development package found, please install JDK package)
+ $(warning No openjdk development package found, please install JDK package, e.g. openjdk-8-jdk, java-1.8.0-openjdk-devel)
NO_JVMTI := 1
endif
endif
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index f7517e1b73f8..83e453de36f8 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -364,7 +364,8 @@ LIBS = -Wl,--whole-archive $(PERFLIBS) $(EXTRA_PERFLIBS) -Wl,--no-whole-archive
ifeq ($(USE_CLANG), 1)
CLANGLIBS_LIST = AST Basic CodeGen Driver Frontend Lex Tooling Edit Sema Analysis Parse Serialization
- LIBCLANG = $(foreach l,$(CLANGLIBS_LIST),$(wildcard $(shell $(LLVM_CONFIG) --libdir)/libclang$(l).a))
+ CLANGLIBS_NOEXT_LIST = $(foreach l,$(CLANGLIBS_LIST),$(shell $(LLVM_CONFIG) --libdir)/libclang$(l))
+ LIBCLANG = $(foreach l,$(CLANGLIBS_NOEXT_LIST),$(wildcard $(l).a $(l).so))
LIBS += -Wl,--start-group $(LIBCLANG) -Wl,--end-group
endif
diff --git a/tools/perf/arch/arm/include/arch-tests.h b/tools/perf/arch/arm/include/arch-tests.h
new file mode 100644
index 000000000000..90ec4c8cb880
--- /dev/null
+++ b/tools/perf/arch/arm/include/arch-tests.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_TESTS_H
+#define ARCH_TESTS_H
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+struct thread;
+struct perf_sample;
+#endif
+
+extern struct test arch_tests[];
+
+#endif
diff --git a/tools/perf/arch/arm/tests/Build b/tools/perf/arch/arm/tests/Build
index b30eff9bcc83..883c57ff0c08 100644
--- a/tools/perf/arch/arm/tests/Build
+++ b/tools/perf/arch/arm/tests/Build
@@ -1,2 +1,4 @@
libperf-y += regs_load.o
libperf-y += dwarf-unwind.o
+
+libperf-y += arch-tests.o
diff --git a/tools/perf/arch/arm/tests/arch-tests.c b/tools/perf/arch/arm/tests/arch-tests.c
new file mode 100644
index 000000000000..5b1543c98022
--- /dev/null
+++ b/tools/perf/arch/arm/tests/arch-tests.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+struct test arch_tests[] = {
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+ {
+ .desc = "DWARF unwind",
+ .func = test__dwarf_unwind,
+ },
+#endif
+ {
+ .func = NULL,
+ },
+};
diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c
index fa639e3e52ac..1ce6bdbda561 100644
--- a/tools/perf/arch/arm/util/auxtrace.c
+++ b/tools/perf/arch/arm/util/auxtrace.c
@@ -1,18 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright(C) 2015 Linaro Limited. All rights reserved.
* Author: Mathieu Poirier <mathieu.poirier@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdbool.h>
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 5c655ad4621e..2f595cd73da6 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -1,18 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright(C) 2015 Linaro Limited. All rights reserved.
* Author: Mathieu Poirier <mathieu.poirier@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <api/fs/fs.h>
diff --git a/tools/perf/arch/arm/util/cs-etm.h b/tools/perf/arch/arm/util/cs-etm.h
index 5256741be549..1a12e64f5127 100644
--- a/tools/perf/arch/arm/util/cs-etm.h
+++ b/tools/perf/arch/arm/util/cs-etm.h
@@ -1,18 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright(C) 2015 Linaro Limited. All rights reserved.
* Author: Mathieu Poirier <mathieu.poirier@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef INCLUDE__PERF_CS_ETM_H__
diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c
index ac4dffc807b8..e047571e6080 100644
--- a/tools/perf/arch/arm/util/pmu.c
+++ b/tools/perf/arch/arm/util/pmu.c
@@ -1,18 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright(C) 2015 Linaro Limited. All rights reserved.
* Author: Mathieu Poirier <mathieu.poirier@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <string.h>
diff --git a/tools/perf/arch/s390/util/auxtrace.c b/tools/perf/arch/s390/util/auxtrace.c
index 6cb48e4cffd9..3afe8256eff2 100644
--- a/tools/perf/arch/s390/util/auxtrace.c
+++ b/tools/perf/arch/s390/util/auxtrace.c
@@ -87,6 +87,7 @@ struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist,
struct perf_evsel *pos;
int diagnose = 0;
+ *err = 0;
if (evlist->nr_entries == 0)
return NULL;
diff --git a/tools/perf/arch/s390/util/header.c b/tools/perf/arch/s390/util/header.c
index a4c30f1c70be..163b92f33998 100644
--- a/tools/perf/arch/s390/util/header.c
+++ b/tools/perf/arch/s390/util/header.c
@@ -146,21 +146,3 @@ char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
zfree(&buf);
return buf;
}
-
-/*
- * Compare the cpuid string returned by get_cpuid() function
- * with the name generated by the jevents file read from
- * pmu-events/arch/s390/mapfile.csv.
- *
- * Parameter mapcpuid is the cpuid as stored in the
- * pmu-events/arch/s390/mapfile.csv. This is just the type number.
- * Parameter cpuid is the cpuid returned by function get_cpuid().
- */
-int strcmp_cpuid_str(const char *mapcpuid, const char *cpuid)
-{
- char *cp = strchr(cpuid, ',');
-
- if (cp == NULL)
- return -1;
- return strncmp(cp + 1, mapcpuid, strlen(mapcpuid));
-}
diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile
index d74eaa7aa927..1a38e78117ce 100644
--- a/tools/perf/arch/x86/Makefile
+++ b/tools/perf/arch/x86/Makefile
@@ -21,7 +21,7 @@ _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
$(header): $(sys)/syscall_64.tbl $(systbl)
@(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
(diff -B arch/x86/entry/syscalls/syscall_64.tbl ../../arch/x86/entry/syscalls/syscall_64.tbl >/dev/null) \
- || echo "Warning: Kernel ABI header at 'tools/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl'" >&2 )) || true
+ || echo "Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl'" >&2 )) || true
$(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@
clean::
diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c
index 5bd1ba8c0282..44f5aba78210 100644
--- a/tools/perf/arch/x86/annotate/instructions.c
+++ b/tools/perf/arch/x86/annotate/instructions.c
@@ -1,21 +1,43 @@
// SPDX-License-Identifier: GPL-2.0
static struct ins x86__instructions[] = {
+ { .name = "adc", .ops = &mov_ops, },
+ { .name = "adcb", .ops = &mov_ops, },
+ { .name = "adcl", .ops = &mov_ops, },
{ .name = "add", .ops = &mov_ops, },
{ .name = "addl", .ops = &mov_ops, },
{ .name = "addq", .ops = &mov_ops, },
+ { .name = "addsd", .ops = &mov_ops, },
{ .name = "addw", .ops = &mov_ops, },
{ .name = "and", .ops = &mov_ops, },
+ { .name = "andb", .ops = &mov_ops, },
+ { .name = "andl", .ops = &mov_ops, },
+ { .name = "andpd", .ops = &mov_ops, },
+ { .name = "andps", .ops = &mov_ops, },
+ { .name = "andq", .ops = &mov_ops, },
+ { .name = "andw", .ops = &mov_ops, },
+ { .name = "bsr", .ops = &mov_ops, },
+ { .name = "bt", .ops = &mov_ops, },
+ { .name = "btr", .ops = &mov_ops, },
{ .name = "bts", .ops = &mov_ops, },
+ { .name = "btsq", .ops = &mov_ops, },
{ .name = "call", .ops = &call_ops, },
{ .name = "callq", .ops = &call_ops, },
+ { .name = "cmovbe", .ops = &mov_ops, },
+ { .name = "cmove", .ops = &mov_ops, },
+ { .name = "cmovae", .ops = &mov_ops, },
{ .name = "cmp", .ops = &mov_ops, },
{ .name = "cmpb", .ops = &mov_ops, },
{ .name = "cmpl", .ops = &mov_ops, },
{ .name = "cmpq", .ops = &mov_ops, },
{ .name = "cmpw", .ops = &mov_ops, },
{ .name = "cmpxch", .ops = &mov_ops, },
+ { .name = "cmpxchg", .ops = &mov_ops, },
+ { .name = "cs", .ops = &mov_ops, },
{ .name = "dec", .ops = &dec_ops, },
{ .name = "decl", .ops = &dec_ops, },
+ { .name = "divsd", .ops = &mov_ops, },
+ { .name = "divss", .ops = &mov_ops, },
+ { .name = "gs", .ops = &mov_ops, },
{ .name = "imul", .ops = &mov_ops, },
{ .name = "inc", .ops = &dec_ops, },
{ .name = "incl", .ops = &dec_ops, },
@@ -57,25 +79,68 @@ static struct ins x86__instructions[] = {
{ .name = "lea", .ops = &mov_ops, },
{ .name = "lock", .ops = &lock_ops, },
{ .name = "mov", .ops = &mov_ops, },
+ { .name = "movapd", .ops = &mov_ops, },
+ { .name = "movaps", .ops = &mov_ops, },
{ .name = "movb", .ops = &mov_ops, },
{ .name = "movdqa", .ops = &mov_ops, },
+ { .name = "movdqu", .ops = &mov_ops, },
{ .name = "movl", .ops = &mov_ops, },
{ .name = "movq", .ops = &mov_ops, },
+ { .name = "movsd", .ops = &mov_ops, },
{ .name = "movslq", .ops = &mov_ops, },
+ { .name = "movss", .ops = &mov_ops, },
+ { .name = "movupd", .ops = &mov_ops, },
+ { .name = "movups", .ops = &mov_ops, },
+ { .name = "movw", .ops = &mov_ops, },
{ .name = "movzbl", .ops = &mov_ops, },
{ .name = "movzwl", .ops = &mov_ops, },
+ { .name = "mulsd", .ops = &mov_ops, },
+ { .name = "mulss", .ops = &mov_ops, },
{ .name = "nop", .ops = &nop_ops, },
{ .name = "nopl", .ops = &nop_ops, },
{ .name = "nopw", .ops = &nop_ops, },
{ .name = "or", .ops = &mov_ops, },
+ { .name = "orb", .ops = &mov_ops, },
{ .name = "orl", .ops = &mov_ops, },
+ { .name = "orps", .ops = &mov_ops, },
+ { .name = "orq", .ops = &mov_ops, },
+ { .name = "pand", .ops = &mov_ops, },
+ { .name = "paddq", .ops = &mov_ops, },
+ { .name = "pcmpeqb", .ops = &mov_ops, },
+ { .name = "por", .ops = &mov_ops, },
+ { .name = "rclb", .ops = &mov_ops, },
+ { .name = "rcll", .ops = &mov_ops, },
+ { .name = "retq", .ops = &ret_ops, },
+ { .name = "sbb", .ops = &mov_ops, },
+ { .name = "sbbl", .ops = &mov_ops, },
+ { .name = "sete", .ops = &mov_ops, },
+ { .name = "sub", .ops = &mov_ops, },
+ { .name = "subl", .ops = &mov_ops, },
+ { .name = "subq", .ops = &mov_ops, },
+ { .name = "subsd", .ops = &mov_ops, },
+ { .name = "subw", .ops = &mov_ops, },
{ .name = "test", .ops = &mov_ops, },
{ .name = "testb", .ops = &mov_ops, },
{ .name = "testl", .ops = &mov_ops, },
+ { .name = "ucomisd", .ops = &mov_ops, },
+ { .name = "ucomiss", .ops = &mov_ops, },
+ { .name = "vaddsd", .ops = &mov_ops, },
+ { .name = "vandpd", .ops = &mov_ops, },
+ { .name = "vmovdqa", .ops = &mov_ops, },
+ { .name = "vmovq", .ops = &mov_ops, },
+ { .name = "vmovsd", .ops = &mov_ops, },
+ { .name = "vmulsd", .ops = &mov_ops, },
+ { .name = "vorpd", .ops = &mov_ops, },
+ { .name = "vsubsd", .ops = &mov_ops, },
+ { .name = "vucomisd", .ops = &mov_ops, },
{ .name = "xadd", .ops = &mov_ops, },
{ .name = "xbeginl", .ops = &jump_ops, },
{ .name = "xbeginq", .ops = &jump_ops, },
- { .name = "retq", .ops = &ret_ops, },
+ { .name = "xchg", .ops = &mov_ops, },
+ { .name = "xor", .ops = &mov_ops, },
+ { .name = "xorb", .ops = &mov_ops, },
+ { .name = "xorpd", .ops = &mov_ops, },
+ { .name = "xorps", .ops = &mov_ops, },
};
static bool x86__ins_is_fused(struct arch *arch, const char *ins1,
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 5aef183e2f85..4dfe42666d0c 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -4,379 +4,383 @@
# The format is:
# <number> <abi> <name> <entry point>
#
+# The __x64_sys_*() stubs are created on-the-fly for sys_*() system calls
+#
# The abi is "common", "64" or "x32" for this file.
#
-0 common read sys_read
-1 common write sys_write
-2 common open sys_open
-3 common close sys_close
-4 common stat sys_newstat
-5 common fstat sys_newfstat
-6 common lstat sys_newlstat
-7 common poll sys_poll
-8 common lseek sys_lseek
-9 common mmap sys_mmap
-10 common mprotect sys_mprotect
-11 common munmap sys_munmap
-12 common brk sys_brk
-13 64 rt_sigaction sys_rt_sigaction
-14 common rt_sigprocmask sys_rt_sigprocmask
-15 64 rt_sigreturn sys_rt_sigreturn/ptregs
-16 64 ioctl sys_ioctl
-17 common pread64 sys_pread64
-18 common pwrite64 sys_pwrite64
-19 64 readv sys_readv
-20 64 writev sys_writev
-21 common access sys_access
-22 common pipe sys_pipe
-23 common select sys_select
-24 common sched_yield sys_sched_yield
-25 common mremap sys_mremap
-26 common msync sys_msync
-27 common mincore sys_mincore
-28 common madvise sys_madvise
-29 common shmget sys_shmget
-30 common shmat sys_shmat
-31 common shmctl sys_shmctl
-32 common dup sys_dup
-33 common dup2 sys_dup2
-34 common pause sys_pause
-35 common nanosleep sys_nanosleep
-36 common getitimer sys_getitimer
-37 common alarm sys_alarm
-38 common setitimer sys_setitimer
-39 common getpid sys_getpid
-40 common sendfile sys_sendfile64
-41 common socket sys_socket
-42 common connect sys_connect
-43 common accept sys_accept
-44 common sendto sys_sendto
-45 64 recvfrom sys_recvfrom
-46 64 sendmsg sys_sendmsg
-47 64 recvmsg sys_recvmsg
-48 common shutdown sys_shutdown
-49 common bind sys_bind
-50 common listen sys_listen
-51 common getsockname sys_getsockname
-52 common getpeername sys_getpeername
-53 common socketpair sys_socketpair
-54 64 setsockopt sys_setsockopt
-55 64 getsockopt sys_getsockopt
-56 common clone sys_clone/ptregs
-57 common fork sys_fork/ptregs
-58 common vfork sys_vfork/ptregs
-59 64 execve sys_execve/ptregs
-60 common exit sys_exit
-61 common wait4 sys_wait4
-62 common kill sys_kill
-63 common uname sys_newuname
-64 common semget sys_semget
-65 common semop sys_semop
-66 common semctl sys_semctl
-67 common shmdt sys_shmdt
-68 common msgget sys_msgget
-69 common msgsnd sys_msgsnd
-70 common msgrcv sys_msgrcv
-71 common msgctl sys_msgctl
-72 common fcntl sys_fcntl
-73 common flock sys_flock
-74 common fsync sys_fsync
-75 common fdatasync sys_fdatasync
-76 common truncate sys_truncate
-77 common ftruncate sys_ftruncate
-78 common getdents sys_getdents
-79 common getcwd sys_getcwd
-80 common chdir sys_chdir
-81 common fchdir sys_fchdir
-82 common rename sys_rename
-83 common mkdir sys_mkdir
-84 common rmdir sys_rmdir
-85 common creat sys_creat
-86 common link sys_link
-87 common unlink sys_unlink
-88 common symlink sys_symlink
-89 common readlink sys_readlink
-90 common chmod sys_chmod
-91 common fchmod sys_fchmod
-92 common chown sys_chown
-93 common fchown sys_fchown
-94 common lchown sys_lchown
-95 common umask sys_umask
-96 common gettimeofday sys_gettimeofday
-97 common getrlimit sys_getrlimit
-98 common getrusage sys_getrusage
-99 common sysinfo sys_sysinfo
-100 common times sys_times
-101 64 ptrace sys_ptrace
-102 common getuid sys_getuid
-103 common syslog sys_syslog
-104 common getgid sys_getgid
-105 common setuid sys_setuid
-106 common setgid sys_setgid
-107 common geteuid sys_geteuid
-108 common getegid sys_getegid
-109 common setpgid sys_setpgid
-110 common getppid sys_getppid
-111 common getpgrp sys_getpgrp
-112 common setsid sys_setsid
-113 common setreuid sys_setreuid
-114 common setregid sys_setregid
-115 common getgroups sys_getgroups
-116 common setgroups sys_setgroups
-117 common setresuid sys_setresuid
-118 common getresuid sys_getresuid
-119 common setresgid sys_setresgid
-120 common getresgid sys_getresgid
-121 common getpgid sys_getpgid
-122 common setfsuid sys_setfsuid
-123 common setfsgid sys_setfsgid
-124 common getsid sys_getsid
-125 common capget sys_capget
-126 common capset sys_capset
-127 64 rt_sigpending sys_rt_sigpending
-128 64 rt_sigtimedwait sys_rt_sigtimedwait
-129 64 rt_sigqueueinfo sys_rt_sigqueueinfo
-130 common rt_sigsuspend sys_rt_sigsuspend
-131 64 sigaltstack sys_sigaltstack
-132 common utime sys_utime
-133 common mknod sys_mknod
+0 common read __x64_sys_read
+1 common write __x64_sys_write
+2 common open __x64_sys_open
+3 common close __x64_sys_close
+4 common stat __x64_sys_newstat
+5 common fstat __x64_sys_newfstat
+6 common lstat __x64_sys_newlstat
+7 common poll __x64_sys_poll
+8 common lseek __x64_sys_lseek
+9 common mmap __x64_sys_mmap
+10 common mprotect __x64_sys_mprotect
+11 common munmap __x64_sys_munmap
+12 common brk __x64_sys_brk
+13 64 rt_sigaction __x64_sys_rt_sigaction
+14 common rt_sigprocmask __x64_sys_rt_sigprocmask
+15 64 rt_sigreturn __x64_sys_rt_sigreturn/ptregs
+16 64 ioctl __x64_sys_ioctl
+17 common pread64 __x64_sys_pread64
+18 common pwrite64 __x64_sys_pwrite64
+19 64 readv __x64_sys_readv
+20 64 writev __x64_sys_writev
+21 common access __x64_sys_access
+22 common pipe __x64_sys_pipe
+23 common select __x64_sys_select
+24 common sched_yield __x64_sys_sched_yield
+25 common mremap __x64_sys_mremap
+26 common msync __x64_sys_msync
+27 common mincore __x64_sys_mincore
+28 common madvise __x64_sys_madvise
+29 common shmget __x64_sys_shmget
+30 common shmat __x64_sys_shmat
+31 common shmctl __x64_sys_shmctl
+32 common dup __x64_sys_dup
+33 common dup2 __x64_sys_dup2
+34 common pause __x64_sys_pause
+35 common nanosleep __x64_sys_nanosleep
+36 common getitimer __x64_sys_getitimer
+37 common alarm __x64_sys_alarm
+38 common setitimer __x64_sys_setitimer
+39 common getpid __x64_sys_getpid
+40 common sendfile __x64_sys_sendfile64
+41 common socket __x64_sys_socket
+42 common connect __x64_sys_connect
+43 common accept __x64_sys_accept
+44 common sendto __x64_sys_sendto
+45 64 recvfrom __x64_sys_recvfrom
+46 64 sendmsg __x64_sys_sendmsg
+47 64 recvmsg __x64_sys_recvmsg
+48 common shutdown __x64_sys_shutdown
+49 common bind __x64_sys_bind
+50 common listen __x64_sys_listen
+51 common getsockname __x64_sys_getsockname
+52 common getpeername __x64_sys_getpeername
+53 common socketpair __x64_sys_socketpair
+54 64 setsockopt __x64_sys_setsockopt
+55 64 getsockopt __x64_sys_getsockopt
+56 common clone __x64_sys_clone/ptregs
+57 common fork __x64_sys_fork/ptregs
+58 common vfork __x64_sys_vfork/ptregs
+59 64 execve __x64_sys_execve/ptregs
+60 common exit __x64_sys_exit
+61 common wait4 __x64_sys_wait4
+62 common kill __x64_sys_kill
+63 common uname __x64_sys_newuname
+64 common semget __x64_sys_semget
+65 common semop __x64_sys_semop
+66 common semctl __x64_sys_semctl
+67 common shmdt __x64_sys_shmdt
+68 common msgget __x64_sys_msgget
+69 common msgsnd __x64_sys_msgsnd
+70 common msgrcv __x64_sys_msgrcv
+71 common msgctl __x64_sys_msgctl
+72 common fcntl __x64_sys_fcntl
+73 common flock __x64_sys_flock
+74 common fsync __x64_sys_fsync
+75 common fdatasync __x64_sys_fdatasync
+76 common truncate __x64_sys_truncate
+77 common ftruncate __x64_sys_ftruncate
+78 common getdents __x64_sys_getdents
+79 common getcwd __x64_sys_getcwd
+80 common chdir __x64_sys_chdir
+81 common fchdir __x64_sys_fchdir
+82 common rename __x64_sys_rename
+83 common mkdir __x64_sys_mkdir
+84 common rmdir __x64_sys_rmdir
+85 common creat __x64_sys_creat
+86 common link __x64_sys_link
+87 common unlink __x64_sys_unlink
+88 common symlink __x64_sys_symlink
+89 common readlink __x64_sys_readlink
+90 common chmod __x64_sys_chmod
+91 common fchmod __x64_sys_fchmod
+92 common chown __x64_sys_chown
+93 common fchown __x64_sys_fchown
+94 common lchown __x64_sys_lchown
+95 common umask __x64_sys_umask
+96 common gettimeofday __x64_sys_gettimeofday
+97 common getrlimit __x64_sys_getrlimit
+98 common getrusage __x64_sys_getrusage
+99 common sysinfo __x64_sys_sysinfo
+100 common times __x64_sys_times
+101 64 ptrace __x64_sys_ptrace
+102 common getuid __x64_sys_getuid
+103 common syslog __x64_sys_syslog
+104 common getgid __x64_sys_getgid
+105 common setuid __x64_sys_setuid
+106 common setgid __x64_sys_setgid
+107 common geteuid __x64_sys_geteuid
+108 common getegid __x64_sys_getegid
+109 common setpgid __x64_sys_setpgid
+110 common getppid __x64_sys_getppid
+111 common getpgrp __x64_sys_getpgrp
+112 common setsid __x64_sys_setsid
+113 common setreuid __x64_sys_setreuid
+114 common setregid __x64_sys_setregid
+115 common getgroups __x64_sys_getgroups
+116 common setgroups __x64_sys_setgroups
+117 common setresuid __x64_sys_setresuid
+118 common getresuid __x64_sys_getresuid
+119 common setresgid __x64_sys_setresgid
+120 common getresgid __x64_sys_getresgid
+121 common getpgid __x64_sys_getpgid
+122 common setfsuid __x64_sys_setfsuid
+123 common setfsgid __x64_sys_setfsgid
+124 common getsid __x64_sys_getsid
+125 common capget __x64_sys_capget
+126 common capset __x64_sys_capset
+127 64 rt_sigpending __x64_sys_rt_sigpending
+128 64 rt_sigtimedwait __x64_sys_rt_sigtimedwait
+129 64 rt_sigqueueinfo __x64_sys_rt_sigqueueinfo
+130 common rt_sigsuspend __x64_sys_rt_sigsuspend
+131 64 sigaltstack __x64_sys_sigaltstack
+132 common utime __x64_sys_utime
+133 common mknod __x64_sys_mknod
134 64 uselib
-135 common personality sys_personality
-136 common ustat sys_ustat
-137 common statfs sys_statfs
-138 common fstatfs sys_fstatfs
-139 common sysfs sys_sysfs
-140 common getpriority sys_getpriority
-141 common setpriority sys_setpriority
-142 common sched_setparam sys_sched_setparam
-143 common sched_getparam sys_sched_getparam
-144 common sched_setscheduler sys_sched_setscheduler
-145 common sched_getscheduler sys_sched_getscheduler
-146 common sched_get_priority_max sys_sched_get_priority_max
-147 common sched_get_priority_min sys_sched_get_priority_min
-148 common sched_rr_get_interval sys_sched_rr_get_interval
-149 common mlock sys_mlock
-150 common munlock sys_munlock
-151 common mlockall sys_mlockall
-152 common munlockall sys_munlockall
-153 common vhangup sys_vhangup
-154 common modify_ldt sys_modify_ldt
-155 common pivot_root sys_pivot_root
-156 64 _sysctl sys_sysctl
-157 common prctl sys_prctl
-158 common arch_prctl sys_arch_prctl
-159 common adjtimex sys_adjtimex
-160 common setrlimit sys_setrlimit
-161 common chroot sys_chroot
-162 common sync sys_sync
-163 common acct sys_acct
-164 common settimeofday sys_settimeofday
-165 common mount sys_mount
-166 common umount2 sys_umount
-167 common swapon sys_swapon
-168 common swapoff sys_swapoff
-169 common reboot sys_reboot
-170 common sethostname sys_sethostname
-171 common setdomainname sys_setdomainname
-172 common iopl sys_iopl/ptregs
-173 common ioperm sys_ioperm
+135 common personality __x64_sys_personality
+136 common ustat __x64_sys_ustat
+137 common statfs __x64_sys_statfs
+138 common fstatfs __x64_sys_fstatfs
+139 common sysfs __x64_sys_sysfs
+140 common getpriority __x64_sys_getpriority
+141 common setpriority __x64_sys_setpriority
+142 common sched_setparam __x64_sys_sched_setparam
+143 common sched_getparam __x64_sys_sched_getparam
+144 common sched_setscheduler __x64_sys_sched_setscheduler
+145 common sched_getscheduler __x64_sys_sched_getscheduler
+146 common sched_get_priority_max __x64_sys_sched_get_priority_max
+147 common sched_get_priority_min __x64_sys_sched_get_priority_min
+148 common sched_rr_get_interval __x64_sys_sched_rr_get_interval
+149 common mlock __x64_sys_mlock
+150 common munlock __x64_sys_munlock
+151 common mlockall __x64_sys_mlockall
+152 common munlockall __x64_sys_munlockall
+153 common vhangup __x64_sys_vhangup
+154 common modify_ldt __x64_sys_modify_ldt
+155 common pivot_root __x64_sys_pivot_root
+156 64 _sysctl __x64_sys_sysctl
+157 common prctl __x64_sys_prctl
+158 common arch_prctl __x64_sys_arch_prctl
+159 common adjtimex __x64_sys_adjtimex
+160 common setrlimit __x64_sys_setrlimit
+161 common chroot __x64_sys_chroot
+162 common sync __x64_sys_sync
+163 common acct __x64_sys_acct
+164 common settimeofday __x64_sys_settimeofday
+165 common mount __x64_sys_mount
+166 common umount2 __x64_sys_umount
+167 common swapon __x64_sys_swapon
+168 common swapoff __x64_sys_swapoff
+169 common reboot __x64_sys_reboot
+170 common sethostname __x64_sys_sethostname
+171 common setdomainname __x64_sys_setdomainname
+172 common iopl __x64_sys_iopl/ptregs
+173 common ioperm __x64_sys_ioperm
174 64 create_module
-175 common init_module sys_init_module
-176 common delete_module sys_delete_module
+175 common init_module __x64_sys_init_module
+176 common delete_module __x64_sys_delete_module
177 64 get_kernel_syms
178 64 query_module
-179 common quotactl sys_quotactl
+179 common quotactl __x64_sys_quotactl
180 64 nfsservctl
181 common getpmsg
182 common putpmsg
183 common afs_syscall
184 common tuxcall
185 common security
-186 common gettid sys_gettid
-187 common readahead sys_readahead
-188 common setxattr sys_setxattr
-189 common lsetxattr sys_lsetxattr
-190 common fsetxattr sys_fsetxattr
-191 common getxattr sys_getxattr
-192 common lgetxattr sys_lgetxattr
-193 common fgetxattr sys_fgetxattr
-194 common listxattr sys_listxattr
-195 common llistxattr sys_llistxattr
-196 common flistxattr sys_flistxattr
-197 common removexattr sys_removexattr
-198 common lremovexattr sys_lremovexattr
-199 common fremovexattr sys_fremovexattr
-200 common tkill sys_tkill
-201 common time sys_time
-202 common futex sys_futex
-203 common sched_setaffinity sys_sched_setaffinity
-204 common sched_getaffinity sys_sched_getaffinity
+186 common gettid __x64_sys_gettid
+187 common readahead __x64_sys_readahead
+188 common setxattr __x64_sys_setxattr
+189 common lsetxattr __x64_sys_lsetxattr
+190 common fsetxattr __x64_sys_fsetxattr
+191 common getxattr __x64_sys_getxattr
+192 common lgetxattr __x64_sys_lgetxattr
+193 common fgetxattr __x64_sys_fgetxattr
+194 common listxattr __x64_sys_listxattr
+195 common llistxattr __x64_sys_llistxattr
+196 common flistxattr __x64_sys_flistxattr
+197 common removexattr __x64_sys_removexattr
+198 common lremovexattr __x64_sys_lremovexattr
+199 common fremovexattr __x64_sys_fremovexattr
+200 common tkill __x64_sys_tkill
+201 common time __x64_sys_time
+202 common futex __x64_sys_futex
+203 common sched_setaffinity __x64_sys_sched_setaffinity
+204 common sched_getaffinity __x64_sys_sched_getaffinity
205 64 set_thread_area
-206 64 io_setup sys_io_setup
-207 common io_destroy sys_io_destroy
-208 common io_getevents sys_io_getevents
-209 64 io_submit sys_io_submit
-210 common io_cancel sys_io_cancel
+206 64 io_setup __x64_sys_io_setup
+207 common io_destroy __x64_sys_io_destroy
+208 common io_getevents __x64_sys_io_getevents
+209 64 io_submit __x64_sys_io_submit
+210 common io_cancel __x64_sys_io_cancel
211 64 get_thread_area
-212 common lookup_dcookie sys_lookup_dcookie
-213 common epoll_create sys_epoll_create
+212 common lookup_dcookie __x64_sys_lookup_dcookie
+213 common epoll_create __x64_sys_epoll_create
214 64 epoll_ctl_old
215 64 epoll_wait_old
-216 common remap_file_pages sys_remap_file_pages
-217 common getdents64 sys_getdents64
-218 common set_tid_address sys_set_tid_address
-219 common restart_syscall sys_restart_syscall
-220 common semtimedop sys_semtimedop
-221 common fadvise64 sys_fadvise64
-222 64 timer_create sys_timer_create
-223 common timer_settime sys_timer_settime
-224 common timer_gettime sys_timer_gettime
-225 common timer_getoverrun sys_timer_getoverrun
-226 common timer_delete sys_timer_delete
-227 common clock_settime sys_clock_settime
-228 common clock_gettime sys_clock_gettime
-229 common clock_getres sys_clock_getres
-230 common clock_nanosleep sys_clock_nanosleep
-231 common exit_group sys_exit_group
-232 common epoll_wait sys_epoll_wait
-233 common epoll_ctl sys_epoll_ctl
-234 common tgkill sys_tgkill
-235 common utimes sys_utimes
+216 common remap_file_pages __x64_sys_remap_file_pages
+217 common getdents64 __x64_sys_getdents64
+218 common set_tid_address __x64_sys_set_tid_address
+219 common restart_syscall __x64_sys_restart_syscall
+220 common semtimedop __x64_sys_semtimedop
+221 common fadvise64 __x64_sys_fadvise64
+222 64 timer_create __x64_sys_timer_create
+223 common timer_settime __x64_sys_timer_settime
+224 common timer_gettime __x64_sys_timer_gettime
+225 common timer_getoverrun __x64_sys_timer_getoverrun
+226 common timer_delete __x64_sys_timer_delete
+227 common clock_settime __x64_sys_clock_settime
+228 common clock_gettime __x64_sys_clock_gettime
+229 common clock_getres __x64_sys_clock_getres
+230 common clock_nanosleep __x64_sys_clock_nanosleep
+231 common exit_group __x64_sys_exit_group
+232 common epoll_wait __x64_sys_epoll_wait
+233 common epoll_ctl __x64_sys_epoll_ctl
+234 common tgkill __x64_sys_tgkill
+235 common utimes __x64_sys_utimes
236 64 vserver
-237 common mbind sys_mbind
-238 common set_mempolicy sys_set_mempolicy
-239 common get_mempolicy sys_get_mempolicy
-240 common mq_open sys_mq_open
-241 common mq_unlink sys_mq_unlink
-242 common mq_timedsend sys_mq_timedsend
-243 common mq_timedreceive sys_mq_timedreceive
-244 64 mq_notify sys_mq_notify
-245 common mq_getsetattr sys_mq_getsetattr
-246 64 kexec_load sys_kexec_load
-247 64 waitid sys_waitid
-248 common add_key sys_add_key
-249 common request_key sys_request_key
-250 common keyctl sys_keyctl
-251 common ioprio_set sys_ioprio_set
-252 common ioprio_get sys_ioprio_get
-253 common inotify_init sys_inotify_init
-254 common inotify_add_watch sys_inotify_add_watch
-255 common inotify_rm_watch sys_inotify_rm_watch
-256 common migrate_pages sys_migrate_pages
-257 common openat sys_openat
-258 common mkdirat sys_mkdirat
-259 common mknodat sys_mknodat
-260 common fchownat sys_fchownat
-261 common futimesat sys_futimesat
-262 common newfstatat sys_newfstatat
-263 common unlinkat sys_unlinkat
-264 common renameat sys_renameat
-265 common linkat sys_linkat
-266 common symlinkat sys_symlinkat
-267 common readlinkat sys_readlinkat
-268 common fchmodat sys_fchmodat
-269 common faccessat sys_faccessat
-270 common pselect6 sys_pselect6
-271 common ppoll sys_ppoll
-272 common unshare sys_unshare
-273 64 set_robust_list sys_set_robust_list
-274 64 get_robust_list sys_get_robust_list
-275 common splice sys_splice
-276 common tee sys_tee
-277 common sync_file_range sys_sync_file_range
-278 64 vmsplice sys_vmsplice
-279 64 move_pages sys_move_pages
-280 common utimensat sys_utimensat
-281 common epoll_pwait sys_epoll_pwait
-282 common signalfd sys_signalfd
-283 common timerfd_create sys_timerfd_create
-284 common eventfd sys_eventfd
-285 common fallocate sys_fallocate
-286 common timerfd_settime sys_timerfd_settime
-287 common timerfd_gettime sys_timerfd_gettime
-288 common accept4 sys_accept4
-289 common signalfd4 sys_signalfd4
-290 common eventfd2 sys_eventfd2
-291 common epoll_create1 sys_epoll_create1
-292 common dup3 sys_dup3
-293 common pipe2 sys_pipe2
-294 common inotify_init1 sys_inotify_init1
-295 64 preadv sys_preadv
-296 64 pwritev sys_pwritev
-297 64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo
-298 common perf_event_open sys_perf_event_open
-299 64 recvmmsg sys_recvmmsg
-300 common fanotify_init sys_fanotify_init
-301 common fanotify_mark sys_fanotify_mark
-302 common prlimit64 sys_prlimit64
-303 common name_to_handle_at sys_name_to_handle_at
-304 common open_by_handle_at sys_open_by_handle_at
-305 common clock_adjtime sys_clock_adjtime
-306 common syncfs sys_syncfs
-307 64 sendmmsg sys_sendmmsg
-308 common setns sys_setns
-309 common getcpu sys_getcpu
-310 64 process_vm_readv sys_process_vm_readv
-311 64 process_vm_writev sys_process_vm_writev
-312 common kcmp sys_kcmp
-313 common finit_module sys_finit_module
-314 common sched_setattr sys_sched_setattr
-315 common sched_getattr sys_sched_getattr
-316 common renameat2 sys_renameat2
-317 common seccomp sys_seccomp
-318 common getrandom sys_getrandom
-319 common memfd_create sys_memfd_create
-320 common kexec_file_load sys_kexec_file_load
-321 common bpf sys_bpf
-322 64 execveat sys_execveat/ptregs
-323 common userfaultfd sys_userfaultfd
-324 common membarrier sys_membarrier
-325 common mlock2 sys_mlock2
-326 common copy_file_range sys_copy_file_range
-327 64 preadv2 sys_preadv2
-328 64 pwritev2 sys_pwritev2
-329 common pkey_mprotect sys_pkey_mprotect
-330 common pkey_alloc sys_pkey_alloc
-331 common pkey_free sys_pkey_free
-332 common statx sys_statx
+237 common mbind __x64_sys_mbind
+238 common set_mempolicy __x64_sys_set_mempolicy
+239 common get_mempolicy __x64_sys_get_mempolicy
+240 common mq_open __x64_sys_mq_open
+241 common mq_unlink __x64_sys_mq_unlink
+242 common mq_timedsend __x64_sys_mq_timedsend
+243 common mq_timedreceive __x64_sys_mq_timedreceive
+244 64 mq_notify __x64_sys_mq_notify
+245 common mq_getsetattr __x64_sys_mq_getsetattr
+246 64 kexec_load __x64_sys_kexec_load
+247 64 waitid __x64_sys_waitid
+248 common add_key __x64_sys_add_key
+249 common request_key __x64_sys_request_key
+250 common keyctl __x64_sys_keyctl
+251 common ioprio_set __x64_sys_ioprio_set
+252 common ioprio_get __x64_sys_ioprio_get
+253 common inotify_init __x64_sys_inotify_init
+254 common inotify_add_watch __x64_sys_inotify_add_watch
+255 common inotify_rm_watch __x64_sys_inotify_rm_watch
+256 common migrate_pages __x64_sys_migrate_pages
+257 common openat __x64_sys_openat
+258 common mkdirat __x64_sys_mkdirat
+259 common mknodat __x64_sys_mknodat
+260 common fchownat __x64_sys_fchownat
+261 common futimesat __x64_sys_futimesat
+262 common newfstatat __x64_sys_newfstatat
+263 common unlinkat __x64_sys_unlinkat
+264 common renameat __x64_sys_renameat
+265 common linkat __x64_sys_linkat
+266 common symlinkat __x64_sys_symlinkat
+267 common readlinkat __x64_sys_readlinkat
+268 common fchmodat __x64_sys_fchmodat
+269 common faccessat __x64_sys_faccessat
+270 common pselect6 __x64_sys_pselect6
+271 common ppoll __x64_sys_ppoll
+272 common unshare __x64_sys_unshare
+273 64 set_robust_list __x64_sys_set_robust_list
+274 64 get_robust_list __x64_sys_get_robust_list
+275 common splice __x64_sys_splice
+276 common tee __x64_sys_tee
+277 common sync_file_range __x64_sys_sync_file_range
+278 64 vmsplice __x64_sys_vmsplice
+279 64 move_pages __x64_sys_move_pages
+280 common utimensat __x64_sys_utimensat
+281 common epoll_pwait __x64_sys_epoll_pwait
+282 common signalfd __x64_sys_signalfd
+283 common timerfd_create __x64_sys_timerfd_create
+284 common eventfd __x64_sys_eventfd
+285 common fallocate __x64_sys_fallocate
+286 common timerfd_settime __x64_sys_timerfd_settime
+287 common timerfd_gettime __x64_sys_timerfd_gettime
+288 common accept4 __x64_sys_accept4
+289 common signalfd4 __x64_sys_signalfd4
+290 common eventfd2 __x64_sys_eventfd2
+291 common epoll_create1 __x64_sys_epoll_create1
+292 common dup3 __x64_sys_dup3
+293 common pipe2 __x64_sys_pipe2
+294 common inotify_init1 __x64_sys_inotify_init1
+295 64 preadv __x64_sys_preadv
+296 64 pwritev __x64_sys_pwritev
+297 64 rt_tgsigqueueinfo __x64_sys_rt_tgsigqueueinfo
+298 common perf_event_open __x64_sys_perf_event_open
+299 64 recvmmsg __x64_sys_recvmmsg
+300 common fanotify_init __x64_sys_fanotify_init
+301 common fanotify_mark __x64_sys_fanotify_mark
+302 common prlimit64 __x64_sys_prlimit64
+303 common name_to_handle_at __x64_sys_name_to_handle_at
+304 common open_by_handle_at __x64_sys_open_by_handle_at
+305 common clock_adjtime __x64_sys_clock_adjtime
+306 common syncfs __x64_sys_syncfs
+307 64 sendmmsg __x64_sys_sendmmsg
+308 common setns __x64_sys_setns
+309 common getcpu __x64_sys_getcpu
+310 64 process_vm_readv __x64_sys_process_vm_readv
+311 64 process_vm_writev __x64_sys_process_vm_writev
+312 common kcmp __x64_sys_kcmp
+313 common finit_module __x64_sys_finit_module
+314 common sched_setattr __x64_sys_sched_setattr
+315 common sched_getattr __x64_sys_sched_getattr
+316 common renameat2 __x64_sys_renameat2
+317 common seccomp __x64_sys_seccomp
+318 common getrandom __x64_sys_getrandom
+319 common memfd_create __x64_sys_memfd_create
+320 common kexec_file_load __x64_sys_kexec_file_load
+321 common bpf __x64_sys_bpf
+322 64 execveat __x64_sys_execveat/ptregs
+323 common userfaultfd __x64_sys_userfaultfd
+324 common membarrier __x64_sys_membarrier
+325 common mlock2 __x64_sys_mlock2
+326 common copy_file_range __x64_sys_copy_file_range
+327 64 preadv2 __x64_sys_preadv2
+328 64 pwritev2 __x64_sys_pwritev2
+329 common pkey_mprotect __x64_sys_pkey_mprotect
+330 common pkey_alloc __x64_sys_pkey_alloc
+331 common pkey_free __x64_sys_pkey_free
+332 common statx __x64_sys_statx
#
# x32-specific system call numbers start at 512 to avoid cache impact
-# for native 64-bit operation.
+# for native 64-bit operation. The __x32_compat_sys stubs are created
+# on-the-fly for compat_sys_*() compatibility system calls if X86_X32
+# is defined.
#
-512 x32 rt_sigaction compat_sys_rt_sigaction
+512 x32 rt_sigaction __x32_compat_sys_rt_sigaction
513 x32 rt_sigreturn sys32_x32_rt_sigreturn
-514 x32 ioctl compat_sys_ioctl
-515 x32 readv compat_sys_readv
-516 x32 writev compat_sys_writev
-517 x32 recvfrom compat_sys_recvfrom
-518 x32 sendmsg compat_sys_sendmsg
-519 x32 recvmsg compat_sys_recvmsg
-520 x32 execve compat_sys_execve/ptregs
-521 x32 ptrace compat_sys_ptrace
-522 x32 rt_sigpending compat_sys_rt_sigpending
-523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait
-524 x32 rt_sigqueueinfo compat_sys_rt_sigqueueinfo
-525 x32 sigaltstack compat_sys_sigaltstack
-526 x32 timer_create compat_sys_timer_create
-527 x32 mq_notify compat_sys_mq_notify
-528 x32 kexec_load compat_sys_kexec_load
-529 x32 waitid compat_sys_waitid
-530 x32 set_robust_list compat_sys_set_robust_list
-531 x32 get_robust_list compat_sys_get_robust_list
-532 x32 vmsplice compat_sys_vmsplice
-533 x32 move_pages compat_sys_move_pages
-534 x32 preadv compat_sys_preadv64
-535 x32 pwritev compat_sys_pwritev64
-536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
-537 x32 recvmmsg compat_sys_recvmmsg
-538 x32 sendmmsg compat_sys_sendmmsg
-539 x32 process_vm_readv compat_sys_process_vm_readv
-540 x32 process_vm_writev compat_sys_process_vm_writev
-541 x32 setsockopt compat_sys_setsockopt
-542 x32 getsockopt compat_sys_getsockopt
-543 x32 io_setup compat_sys_io_setup
-544 x32 io_submit compat_sys_io_submit
-545 x32 execveat compat_sys_execveat/ptregs
-546 x32 preadv2 compat_sys_preadv64v2
-547 x32 pwritev2 compat_sys_pwritev64v2
+514 x32 ioctl __x32_compat_sys_ioctl
+515 x32 readv __x32_compat_sys_readv
+516 x32 writev __x32_compat_sys_writev
+517 x32 recvfrom __x32_compat_sys_recvfrom
+518 x32 sendmsg __x32_compat_sys_sendmsg
+519 x32 recvmsg __x32_compat_sys_recvmsg
+520 x32 execve __x32_compat_sys_execve/ptregs
+521 x32 ptrace __x32_compat_sys_ptrace
+522 x32 rt_sigpending __x32_compat_sys_rt_sigpending
+523 x32 rt_sigtimedwait __x32_compat_sys_rt_sigtimedwait
+524 x32 rt_sigqueueinfo __x32_compat_sys_rt_sigqueueinfo
+525 x32 sigaltstack __x32_compat_sys_sigaltstack
+526 x32 timer_create __x32_compat_sys_timer_create
+527 x32 mq_notify __x32_compat_sys_mq_notify
+528 x32 kexec_load __x32_compat_sys_kexec_load
+529 x32 waitid __x32_compat_sys_waitid
+530 x32 set_robust_list __x32_compat_sys_set_robust_list
+531 x32 get_robust_list __x32_compat_sys_get_robust_list
+532 x32 vmsplice __x32_compat_sys_vmsplice
+533 x32 move_pages __x32_compat_sys_move_pages
+534 x32 preadv __x32_compat_sys_preadv64
+535 x32 pwritev __x32_compat_sys_pwritev64
+536 x32 rt_tgsigqueueinfo __x32_compat_sys_rt_tgsigqueueinfo
+537 x32 recvmmsg __x32_compat_sys_recvmmsg
+538 x32 sendmmsg __x32_compat_sys_sendmmsg
+539 x32 process_vm_readv __x32_compat_sys_process_vm_readv
+540 x32 process_vm_writev __x32_compat_sys_process_vm_writev
+541 x32 setsockopt __x32_compat_sys_setsockopt
+542 x32 getsockopt __x32_compat_sys_getsockopt
+543 x32 io_setup __x32_compat_sys_io_setup
+544 x32 io_submit __x32_compat_sys_io_submit
+545 x32 execveat __x32_compat_sys_execveat/ptregs
+546 x32 preadv2 __x32_compat_sys_preadv64v2
+547 x32 pwritev2 __x32_compat_sys_pwritev64v2
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index 4aca13f23b9d..1c41b4eaf73c 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -439,7 +439,7 @@ int cmd_help(int argc, const char **argv)
#ifdef HAVE_LIBELF_SUPPORT
"probe",
#endif
-#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE)
+#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)
"trace",
#endif
NULL };
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 506564651cda..57393e94d156 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -83,7 +83,7 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
};
argc = parse_options(argc, argv, options, record_mem_usage,
- PARSE_OPT_STOP_AT_NON_OPTION);
+ PARSE_OPT_KEEP_UNKNOWN);
rec_argc = argc + 9; /* max number of arguments */
rec_argv = calloc(rec_argc + 1, sizeof(char *));
@@ -436,7 +436,7 @@ int cmd_mem(int argc, const char **argv)
}
argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands,
- mem_usage, PARSE_OPT_STOP_AT_NON_OPTION);
+ mem_usage, PARSE_OPT_KEEP_UNKNOWN);
if (!argc || !(strncmp(argv[0], "rec", 3) || mem.operation))
usage_with_options(mem_usage, mem_options);
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 313c42423393..e0a9845b6cbc 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -657,8 +657,11 @@ static int perf_sample__fprintf_start(struct perf_sample *sample,
break;
case PERF_RECORD_SWITCH:
case PERF_RECORD_SWITCH_CPU_WIDE:
- if (has(SWITCH_OUT))
+ if (has(SWITCH_OUT)) {
ret += fprintf(fp, "S");
+ if (sample->misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT)
+ ret += fprintf(fp, "p");
+ }
default:
break;
}
@@ -2801,11 +2804,11 @@ int find_scripts(char **scripts_array, char **scripts_path_array)
for_each_lang(scripts_path, scripts_dir, lang_dirent) {
scnprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path,
lang_dirent->d_name);
-#ifdef NO_LIBPERL
+#ifndef HAVE_LIBPERL_SUPPORT
if (strstr(lang_path, "perl"))
continue;
#endif
-#ifdef NO_LIBPYTHON
+#ifndef HAVE_LIBPYTHON_SUPPORT
if (strstr(lang_path, "python"))
continue;
#endif
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f5c454855908..f17dc601b0f3 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -172,6 +172,7 @@ static bool interval_count;
static const char *output_name;
static int output_fd;
static int print_free_counters_hint;
+static int print_mixed_hw_group_error;
struct perf_stat {
bool record;
@@ -1126,6 +1127,30 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
}
+static bool is_mixed_hw_group(struct perf_evsel *counter)
+{
+ struct perf_evlist *evlist = counter->evlist;
+ u32 pmu_type = counter->attr.type;
+ struct perf_evsel *pos;
+
+ if (counter->nr_members < 2)
+ return false;
+
+ evlist__for_each_entry(evlist, pos) {
+ /* software events can be part of any hardware group */
+ if (pos->attr.type == PERF_TYPE_SOFTWARE)
+ continue;
+ if (pmu_type == PERF_TYPE_SOFTWARE) {
+ pmu_type = pos->attr.type;
+ continue;
+ }
+ if (pmu_type != pos->attr.type)
+ return true;
+ }
+
+ return false;
+}
+
static void printout(int id, int nr, struct perf_evsel *counter, double uval,
char *prefix, u64 run, u64 ena, double noise,
struct runtime_stat *st)
@@ -1178,8 +1203,11 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
csv_sep);
- if (counter->supported)
+ if (counter->supported) {
print_free_counters_hint = 1;
+ if (is_mixed_hw_group(counter))
+ print_mixed_hw_group_error = 1;
+ }
fprintf(stat_config.output, "%-*s%s",
csv_output ? 0 : unit_width,
@@ -1256,7 +1284,8 @@ static void uniquify_event_name(struct perf_evsel *counter)
char *new_name;
char *config;
- if (!counter->pmu_name || !strncmp(counter->name, counter->pmu_name,
+ if (counter->uniquified_name ||
+ !counter->pmu_name || !strncmp(counter->name, counter->pmu_name,
strlen(counter->pmu_name)))
return;
@@ -1274,6 +1303,8 @@ static void uniquify_event_name(struct perf_evsel *counter)
counter->name = new_name;
}
}
+
+ counter->uniquified_name = true;
}
static void collect_all_aliases(struct perf_evsel *counter,
@@ -1757,6 +1788,11 @@ static void print_footer(void)
" echo 0 > /proc/sys/kernel/nmi_watchdog\n"
" perf stat ...\n"
" echo 1 > /proc/sys/kernel/nmi_watchdog\n");
+
+ if (print_mixed_hw_group_error)
+ fprintf(output,
+ "The events in group usually have to be from "
+ "the same PMU. Try reorganizing the group.\n");
}
static void print_counters(struct timespec *ts, int argc, const char **argv)
@@ -1943,7 +1979,8 @@ static const struct option stat_options[] = {
OPT_STRING(0, "post", &post_cmd, "command",
"command to run after to the measured command"),
OPT_UINTEGER('I', "interval-print", &stat_config.interval,
- "print counts at regular interval in ms (>= 10)"),
+ "print counts at regular interval in ms "
+ "(overhead is possible for values <= 100ms)"),
OPT_INTEGER(0, "interval-count", &stat_config.times,
"print counts for fixed number of times"),
OPT_UINTEGER(0, "timeout", &stat_config.timeout,
@@ -2923,17 +2960,6 @@ int cmd_stat(int argc, const char **argv)
}
}
- if (interval && interval < 100) {
- if (interval < 10) {
- pr_err("print interval must be >= 10ms\n");
- parse_options_usage(stat_usage, stat_options, "I", 1);
- goto out;
- } else
- pr_warning("print interval < 100ms. "
- "The overhead percentage could be high in some cases. "
- "Please proceed with caution.\n");
- }
-
if (stat_config.times && interval)
interval_count = true;
else if (stat_config.times && !interval) {
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 87b95c9410b4..3ad17ee89403 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -112,6 +112,7 @@ struct trace {
bool multiple_threads;
bool summary;
bool summary_only;
+ bool failure_only;
bool show_comm;
bool print_sample;
bool show_tool_stats;
@@ -1565,7 +1566,7 @@ static int trace__printf_interrupted_entry(struct trace *trace)
struct thread_trace *ttrace;
size_t printed;
- if (trace->current == NULL)
+ if (trace->failure_only || trace->current == NULL)
return 0;
ttrace = thread__priv(trace->current);
@@ -1638,7 +1639,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
args, trace, thread);
if (sc->is_exit) {
- if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
+ if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) {
trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
}
@@ -1742,7 +1743,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
}
}
- if (trace->summary_only)
+ if (trace->summary_only || (ret >= 0 && trace->failure_only))
goto out;
trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
@@ -1961,7 +1962,7 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
trace->output);
}
- fprintf(trace->output, ")\n");
+ fprintf(trace->output, "\n");
if (callchain_ret > 0)
trace__fprintf_callchain(trace, sample);
@@ -3087,6 +3088,8 @@ int cmd_trace(int argc, const char **argv)
OPT_INCR('v', "verbose", &verbose, "be more verbose"),
OPT_BOOLEAN('T', "time", &trace.full_time,
"Show full timestamp, not time relative to first start"),
+ OPT_BOOLEAN(0, "failure", &trace.failure_only,
+ "Show only syscalls that failed"),
OPT_BOOLEAN('s', "summary", &trace.summary_only,
"Show only syscall summary with statistics"),
OPT_BOOLEAN('S', "with-summary", &trace.summary,
diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c
index 37019c5d675f..50df168be326 100644
--- a/tools/perf/builtin-version.c
+++ b/tools/perf/builtin-version.c
@@ -1,11 +1,94 @@
// SPDX-License-Identifier: GPL-2.0
#include "builtin.h"
#include "perf.h"
+#include "color.h"
#include <linux/compiler.h>
+#include <tools/config.h>
#include <stdio.h>
+#include <string.h>
+#include <subcmd/parse-options.h>
-int cmd_version(int argc __maybe_unused, const char **argv __maybe_unused)
+int version_verbose;
+
+struct version {
+ bool build_options;
+};
+
+static struct version version;
+
+static struct option version_options[] = {
+ OPT_BOOLEAN(0, "build-options", &version.build_options,
+ "display the build options"),
+};
+
+static const char * const version_usage[] = {
+ "perf version [<options>]",
+ NULL
+};
+
+static void on_off_print(const char *status)
+{
+ printf("[ ");
+
+ if (!strcmp(status, "OFF"))
+ color_fprintf(stdout, PERF_COLOR_RED, "%-3s", status);
+ else
+ color_fprintf(stdout, PERF_COLOR_GREEN, "%-3s", status);
+
+ printf(" ]");
+}
+
+static void status_print(const char *name, const char *macro,
+ const char *status)
{
+ printf("%22s: ", name);
+ on_off_print(status);
+ printf(" # %s\n", macro);
+}
+
+#define STATUS(__d, __m) \
+do { \
+ if (IS_BUILTIN(__d)) \
+ status_print(#__m, #__d, "on"); \
+ else \
+ status_print(#__m, #__d, "OFF"); \
+} while (0)
+
+static void library_status(void)
+{
+ STATUS(HAVE_DWARF_SUPPORT, dwarf);
+ STATUS(HAVE_DWARF_GETLOCATIONS_SUPPORT, dwarf_getlocations);
+ STATUS(HAVE_GLIBC_SUPPORT, glibc);
+ STATUS(HAVE_GTK2_SUPPORT, gtk2);
+#ifndef HAVE_SYSCALL_TABLE_SUPPORT
+ STATUS(HAVE_LIBAUDIT_SUPPORT, libaudit);
+#endif
+ STATUS(HAVE_SYSCALL_TABLE_SUPPORT, syscall_table);
+ STATUS(HAVE_LIBBFD_SUPPORT, libbfd);
+ STATUS(HAVE_LIBELF_SUPPORT, libelf);
+ STATUS(HAVE_LIBNUMA_SUPPORT, libnuma);
+ STATUS(HAVE_LIBNUMA_SUPPORT, numa_num_possible_cpus);
+ STATUS(HAVE_LIBPERL_SUPPORT, libperl);
+ STATUS(HAVE_LIBPYTHON_SUPPORT, libpython);
+ STATUS(HAVE_SLANG_SUPPORT, libslang);
+ STATUS(HAVE_LIBCRYPTO_SUPPORT, libcrypto);
+ STATUS(HAVE_LIBUNWIND_SUPPORT, libunwind);
+ STATUS(HAVE_DWARF_SUPPORT, libdw-dwarf-unwind);
+ STATUS(HAVE_ZLIB_SUPPORT, zlib);
+ STATUS(HAVE_LZMA_SUPPORT, lzma);
+ STATUS(HAVE_AUXTRACE_SUPPORT, get_cpuid);
+ STATUS(HAVE_LIBBPF_SUPPORT, bpf);
+}
+
+int cmd_version(int argc, const char **argv)
+{
+ argc = parse_options(argc, argv, version_options, version_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+
printf("perf version %s\n", perf_version_string);
+
+ if (version.build_options || version_verbose == 1)
+ library_status();
+
return 0;
}
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 1b3fc8ec0fa2..20a08cb32332 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -73,7 +73,7 @@ static struct cmd_struct commands[] = {
{ "lock", cmd_lock, 0 },
{ "kvm", cmd_kvm, 0 },
{ "test", cmd_test, 0 },
-#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE)
+#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)
{ "trace", cmd_trace, 0 },
#endif
{ "inject", cmd_inject, 0 },
@@ -190,6 +190,12 @@ static int handle_options(const char ***argv, int *argc, int *envchanged)
break;
}
+ if (!strcmp(cmd, "-vv")) {
+ (*argv)[0] = "version";
+ version_verbose = 1;
+ break;
+ }
+
/*
* Check remaining flags.
*/
@@ -485,7 +491,7 @@ int main(int argc, const char **argv)
argv[0] = cmd;
}
if (strstarts(cmd, "trace")) {
-#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE)
+#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)
setup_path();
argv[0] = "trace";
return cmd_trace(argc, argv);
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 8fec1abd0f1f..a1a97956136f 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -84,6 +84,7 @@ struct record_opts {
struct option;
extern const char * const *record_usage;
extern struct option *record_options;
+extern int version_verbose;
int record__parse_freq(const struct option *opt, const char *str, int unset);
#endif
diff --git a/tools/perf/pmu-events/arch/s390/mapfile.csv b/tools/perf/pmu-events/arch/s390/mapfile.csv
index ca7682748a4b..78bcf7f8e206 100644
--- a/tools/perf/pmu-events/arch/s390/mapfile.csv
+++ b/tools/perf/pmu-events/arch/s390/mapfile.csv
@@ -1,6 +1,6 @@
Family-model,Version,Filename,EventType
-209[78],1,cf_z10,core
-281[78],1,cf_z196,core
-282[78],1,cf_zec12,core
-296[45],1,cf_z13,core
-3906,3,cf_z14,core
+^IBM.209[78].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z10,core
+^IBM.281[78].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z196,core
+^IBM.282[78].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_zec12,core
+^IBM.296[45].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z13,core
+^IBM.390[67].*[13]\.[1-5].[[:xdigit:]]+$,3,cf_z14,core
diff --git a/tools/perf/tests/attr/test-record-group-sampling b/tools/perf/tests/attr/test-record-group-sampling
index f906b793196f..8a33ca4f9e1f 100644
--- a/tools/perf/tests/attr/test-record-group-sampling
+++ b/tools/perf/tests/attr/test-record-group-sampling
@@ -35,3 +35,6 @@ inherit=0
# sampling disabled
sample_freq=0
sample_period=0
+freq=0
+write_backward=0
+sample_id_all=0
diff --git a/tools/perf/tests/bpf-script-example.c b/tools/perf/tests/bpf-script-example.c
index e4123c1b0e88..1ca5106df5f1 100644
--- a/tools/perf/tests/bpf-script-example.c
+++ b/tools/perf/tests/bpf-script-example.c
@@ -31,7 +31,7 @@ struct bpf_map_def SEC("maps") flip_table = {
.max_entries = 1,
};
-SEC("func=SyS_epoll_pwait")
+SEC("func=do_epoll_wait")
int bpf_func__SyS_epoll_pwait(void *ctx)
{
int ind =0;
diff --git a/tools/perf/tests/bpf-script-test-kbuild.c b/tools/perf/tests/bpf-script-test-kbuild.c
index 3626924740d8..ff3ec8337f0a 100644
--- a/tools/perf/tests/bpf-script-test-kbuild.c
+++ b/tools/perf/tests/bpf-script-test-kbuild.c
@@ -9,7 +9,6 @@
#define SEC(NAME) __attribute__((section(NAME), used))
#include <uapi/linux/fs.h>
-#include <uapi/asm/ptrace.h>
SEC("func=vfs_llseek")
int bpf_func__vfs_llseek(void *ctx)
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 625f5a6772af..cac8f8889bc3 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -118,6 +118,7 @@ static struct test generic_tests[] = {
{
.desc = "Breakpoint accounting",
.func = test__bp_accounting,
+ .is_supported = test__bp_signal_is_supported,
},
{
.desc = "Number of exit events of a simple workload",
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index bb8e6bcb0d96..0919b0793e5b 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -75,7 +75,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]);
evsels[i] = perf_evsel__newtp("syscalls", name);
if (IS_ERR(evsels[i])) {
- pr_debug("perf_evsel__new\n");
+ pr_debug("perf_evsel__new(%s)\n", name);
goto out_delete_evlist;
}
diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
index 1ecc1f0ff84a..016882dbbc16 100755
--- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
@@ -19,12 +19,10 @@ trace_libc_inet_pton_backtrace() {
expected[1]=".*inet_pton[[:space:]]\($libc\)$"
case "$(uname -m)" in
s390x)
- eventattr='call-graph=dwarf'
+ eventattr='call-graph=dwarf,max-stack=4'
expected[2]="gaih_inet.*[[:space:]]\($libc|inlined\)$"
- expected[3]="__GI_getaddrinfo[[:space:]]\($libc|inlined\)$"
+ expected[3]="(__GI_)?getaddrinfo[[:space:]]\($libc|inlined\)$"
expected[4]="main[[:space:]]\(.*/bin/ping.*\)$"
- expected[5]="__libc_start_main[[:space:]]\($libc\)$"
- expected[6]="_start[[:space:]]\(.*/bin/ping.*\)$"
;;
*)
eventattr='max-stack=3'
diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c
index 417e3ecfe9d7..9f68077b241b 100644
--- a/tools/perf/trace/beauty/mmap.c
+++ b/tools/perf/trace/beauty/mmap.c
@@ -54,6 +54,9 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
P_MMAP_FLAG(EXECUTABLE);
P_MMAP_FLAG(FILE);
P_MMAP_FLAG(FIXED);
+#ifdef MAP_FIXED_NOREPLACE
+ P_MMAP_FLAG(FIXED_NOREPLACE);
+#endif
P_MMAP_FLAG(GROWSDOWN);
P_MMAP_FLAG(HUGETLB);
P_MMAP_FLAG(LOCKED);
diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index 9f6ce29b83b4..4f75561424ed 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -45,11 +45,16 @@ void ui_browser__set_percent_color(struct ui_browser *browser,
ui_browser__set_color(browser, color);
}
-void ui_browser__gotorc(struct ui_browser *browser, int y, int x)
+void ui_browser__gotorc_title(struct ui_browser *browser, int y, int x)
{
SLsmg_gotorc(browser->y + y, browser->x + x);
}
+void ui_browser__gotorc(struct ui_browser *browser, int y, int x)
+{
+ SLsmg_gotorc(browser->y + y + browser->extra_title_lines, browser->x + x);
+}
+
void ui_browser__write_nstring(struct ui_browser *browser __maybe_unused, const char *msg,
unsigned int width)
{
@@ -191,6 +196,7 @@ void ui_browser__refresh_dimensions(struct ui_browser *browser)
{
browser->width = SLtt_Screen_Cols - 1;
browser->height = browser->rows = SLtt_Screen_Rows - 2;
+ browser->rows -= browser->extra_title_lines;
browser->y = 1;
browser->x = 0;
}
@@ -337,8 +343,8 @@ static int __ui_browser__refresh(struct ui_browser *browser)
else
width += 1;
- SLsmg_fill_region(browser->y + row, browser->x,
- browser->height - row, width, ' ');
+ SLsmg_fill_region(browser->y + row + browser->extra_title_lines, browser->x,
+ browser->rows - row, width, ' ');
return 0;
}
diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h
index 70057178ee34..aa5932e1d62e 100644
--- a/tools/perf/ui/browser.h
+++ b/tools/perf/ui/browser.h
@@ -17,6 +17,7 @@ struct ui_browser {
u64 index, top_idx;
void *top, *entries;
u16 y, x, width, height, rows, columns, horiz_scroll;
+ u8 extra_title_lines;
int current_color;
void *priv;
const char *title;
@@ -38,6 +39,7 @@ bool ui_browser__is_current_entry(struct ui_browser *browser, unsigned row);
void ui_browser__refresh_dimensions(struct ui_browser *browser);
void ui_browser__reset_index(struct ui_browser *browser);
+void ui_browser__gotorc_title(struct ui_browser *browser, int y, int x);
void ui_browser__gotorc(struct ui_browser *browser, int y, int x);
void ui_browser__write_nstring(struct ui_browser *browser, const char *msg,
unsigned int width);
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index c02fb437ac8e..3781d74088a7 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -218,7 +218,7 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser)
annotate_browser__draw_current_jump(browser);
ui_browser__set_color(browser, HE_COLORSET_NORMAL);
- __ui_browser__vline(browser, pcnt_width, 0, browser->height - 1);
+ __ui_browser__vline(browser, pcnt_width, 0, browser->rows - 1);
return ret;
}
@@ -592,21 +592,40 @@ bool annotate_browser__continue_search_reverse(struct annotate_browser *browser,
return __annotate_browser__search_reverse(browser);
}
+static int annotate_browser__show(struct ui_browser *browser, char *title, const char *help)
+{
+ struct map_symbol *ms = browser->priv;
+ struct symbol *sym = ms->sym;
+ char symbol_dso[SYM_TITLE_MAX_SIZE];
+
+ if (ui_browser__show(browser, title, help) < 0)
+ return -1;
+
+ sym_title(sym, ms->map, symbol_dso, sizeof(symbol_dso));
+
+ ui_browser__gotorc_title(browser, 0, 0);
+ ui_browser__set_color(browser, HE_COLORSET_ROOT);
+ ui_browser__write_nstring(browser, symbol_dso, browser->width + 1);
+ return 0;
+}
+
static int annotate_browser__run(struct annotate_browser *browser,
struct perf_evsel *evsel,
struct hist_browser_timer *hbt)
{
struct rb_node *nd = NULL;
+ struct hists *hists = evsel__hists(evsel);
struct map_symbol *ms = browser->b.priv;
struct symbol *sym = ms->sym;
struct annotation *notes = symbol__annotation(ms->sym);
const char *help = "Press 'h' for help on key bindings";
int delay_secs = hbt ? hbt->refresh : 0;
+ char title[256];
int key;
- char title[SYM_TITLE_MAX_SIZE];
- sym_title(sym, ms->map, title, sizeof(title));
- if (ui_browser__show(&browser->b, title, help) < 0)
+ annotation__scnprintf_samples_period(notes, title, sizeof(title), evsel);
+
+ if (annotate_browser__show(&browser->b, title, help) < 0)
return -1;
annotate_browser__calc_percent(browser, evsel);
@@ -637,8 +656,11 @@ static int annotate_browser__run(struct annotate_browser *browser,
if (hbt)
hbt->timer(hbt->arg);
- if (delay_secs != 0)
+ if (delay_secs != 0) {
symbol__annotate_decay_histogram(sym, evsel->idx);
+ hists__scnprintf_title(hists, title, sizeof(title));
+ annotate_browser__show(&browser->b, title, help);
+ }
continue;
case K_TAB:
if (nd != NULL) {
@@ -670,6 +692,7 @@ static int annotate_browser__run(struct annotate_browser *browser,
"J Toggle showing number of jump sources on targets\n"
"n Search next string\n"
"o Toggle disassembler output/simplified view\n"
+ "O Bump offset level (jump targets -> +call -> all -> cycle thru)\n"
"s Toggle source code view\n"
"t Circulate percent, total period, samples view\n"
"/ Search string\n"
@@ -697,6 +720,10 @@ static int annotate_browser__run(struct annotate_browser *browser,
notes->options->use_offset = !notes->options->use_offset;
annotation__update_column_widths(notes);
continue;
+ case 'O':
+ if (++notes->options->offset_level > ANNOTATION__MAX_OFFSET_LEVEL)
+ notes->options->offset_level = ANNOTATION__MIN_OFFSET_LEVEL;
+ continue;
case 'j':
notes->options->jump_arrows = !notes->options->jump_arrows;
continue;
@@ -812,6 +839,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
.seek = ui_browser__list_head_seek,
.write = annotate_browser__write,
.filter = disasm_line__filter,
+ .extra_title_lines = 1, /* for hists__scnprintf_title() */
.priv = &ms,
.use_navkeypressed = true,
},
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 8b4e82548f8e..e5f247247daa 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -32,8 +32,7 @@
extern void hist_browser__init_hpp(void);
-static int perf_evsel_browser_title(struct hist_browser *browser,
- char *bf, size_t size);
+static int hists_browser__scnprintf_title(struct hist_browser *browser, char *bf, size_t size);
static void hist_browser__update_nr_entries(struct hist_browser *hb);
static struct rb_node *hists__filter_entries(struct rb_node *nd,
@@ -62,6 +61,15 @@ static int hist_browser__get_folding(struct hist_browser *browser)
return unfolded_rows;
}
+static void hist_browser__set_title_space(struct hist_browser *hb)
+{
+ struct ui_browser *browser = &hb->b;
+ struct hists *hists = hb->hists;
+ struct perf_hpp_list *hpp_list = hists->hpp_list;
+
+ browser->extra_title_lines = hb->show_headers ? hpp_list->nr_header_lines : 0;
+}
+
static u32 hist_browser__nr_entries(struct hist_browser *hb)
{
u32 nr_entries;
@@ -82,10 +90,16 @@ static void hist_browser__update_rows(struct hist_browser *hb)
struct ui_browser *browser = &hb->b;
struct hists *hists = hb->hists;
struct perf_hpp_list *hpp_list = hists->hpp_list;
- u16 header_offset, index_row;
+ u16 index_row;
- header_offset = hb->show_headers ? hpp_list->nr_header_lines : 0;
- browser->rows = browser->height - header_offset;
+ if (!hb->show_headers) {
+ browser->rows += browser->extra_title_lines;
+ browser->extra_title_lines = 0;
+ return;
+ }
+
+ browser->extra_title_lines = hpp_list->nr_header_lines;
+ browser->rows -= browser->extra_title_lines;
/*
* Verify if we were at the last line and that line isn't
* visibe because we now show the header line(s).
@@ -108,17 +122,6 @@ static void hist_browser__refresh_dimensions(struct ui_browser *browser)
* changeset.
*/
ui_browser__refresh_dimensions(browser);
- hist_browser__update_rows(hb);
-}
-
-static void hist_browser__gotorc(struct hist_browser *browser, int row, int column)
-{
- struct hists *hists = browser->hists;
- struct perf_hpp_list *hpp_list = hists->hpp_list;
- u16 header_offset;
-
- header_offset = browser->show_headers ? hpp_list->nr_header_lines : 0;
- ui_browser__gotorc(&browser->b, row + header_offset, column);
}
static void hist_browser__reset(struct hist_browser *browser)
@@ -656,9 +659,10 @@ int hist_browser__run(struct hist_browser *browser, const char *help,
struct hist_entry *h = rb_entry(browser->b.top,
struct hist_entry, rb_node);
ui_helpline__pop();
- ui_helpline__fpush("%d: nr_ent=(%d,%d), rows=%d, idx=%d, fve: idx=%d, row_off=%d, nrows=%d",
+ ui_helpline__fpush("%d: nr_ent=(%d,%d), etl: %d, rows=%d, idx=%d, fve: idx=%d, row_off=%d, nrows=%d",
seq++, browser->b.nr_entries,
browser->hists->nr_entries,
+ browser->b.extra_title_lines,
browser->b.rows,
browser->b.index,
browser->b.top_idx,
@@ -733,7 +737,7 @@ static void hist_browser__show_callchain_entry(struct hist_browser *browser,
}
ui_browser__set_color(&browser->b, color);
- hist_browser__gotorc(browser, row, 0);
+ ui_browser__gotorc(&browser->b, row, 0);
ui_browser__write_nstring(&browser->b, " ", offset);
ui_browser__printf(&browser->b, "%c", folded_sign);
ui_browser__write_graph(&browser->b, show_annotated ? SLSMG_RARROW_CHAR : ' ');
@@ -1249,7 +1253,7 @@ static int hist_browser__show_entry(struct hist_browser *browser,
};
int column = 0;
- hist_browser__gotorc(browser, row, 0);
+ ui_browser__gotorc(&browser->b, row, 0);
hists__for_each_format(browser->hists, fmt) {
char s[2048];
@@ -1358,7 +1362,7 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser,
goto show_callchain;
}
- hist_browser__gotorc(browser, row, 0);
+ ui_browser__gotorc(&browser->b, row, 0);
if (current_entry && browser->b.navkeypressed)
ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED);
@@ -1507,7 +1511,7 @@ static int hist_browser__show_no_entry(struct hist_browser *browser,
browser->selection = NULL;
}
- hist_browser__gotorc(browser, row, 0);
+ ui_browser__gotorc(&browser->b, row, 0);
if (current_entry && browser->b.navkeypressed)
ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED);
@@ -1713,7 +1717,7 @@ static void hists_browser__headers(struct hist_browser *browser)
hists_browser__scnprintf_headers(browser, headers,
sizeof(headers), line);
- ui_browser__gotorc(&browser->b, line, 0);
+ ui_browser__gotorc_title(&browser->b, line, 0);
ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
}
@@ -1740,17 +1744,11 @@ static void ui_browser__hists_init_top(struct ui_browser *browser)
static unsigned int hist_browser__refresh(struct ui_browser *browser)
{
unsigned row = 0;
- u16 header_offset = 0;
struct rb_node *nd;
struct hist_browser *hb = container_of(browser, struct hist_browser, b);
- struct hists *hists = hb->hists;
-
- if (hb->show_headers) {
- struct perf_hpp_list *hpp_list = hists->hpp_list;
+ if (hb->show_headers)
hist_browser__show_headers(hb);
- header_offset = hpp_list->nr_header_lines;
- }
ui_browser__hists_init_top(browser);
hb->he_selection = NULL;
@@ -1788,7 +1786,7 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser)
break;
}
- return row + header_offset;
+ return row;
}
static struct rb_node *hists__filter_entries(struct rb_node *nd,
@@ -2143,6 +2141,7 @@ void hist_browser__init(struct hist_browser *browser,
browser->b.seek = ui_browser__hists_seek;
browser->b.use_navkeypressed = true;
browser->show_headers = symbol_conf.show_hist_headers;
+ hist_browser__set_title_space(browser);
if (symbol_conf.report_hierarchy) {
struct perf_hpp_list_node *fmt_node;
@@ -2183,7 +2182,7 @@ perf_evsel_browser__new(struct perf_evsel *evsel,
if (browser) {
browser->hbt = hbt;
browser->env = env;
- browser->title = perf_evsel_browser_title;
+ browser->title = hists_browser__scnprintf_title;
}
return browser;
}
@@ -2209,84 +2208,11 @@ static inline bool is_report_browser(void *timer)
return timer == NULL;
}
-static int perf_evsel_browser_title(struct hist_browser *browser,
- char *bf, size_t size)
+static int hists_browser__scnprintf_title(struct hist_browser *browser, char *bf, size_t size)
{
struct hist_browser_timer *hbt = browser->hbt;
- struct hists *hists = browser->hists;
- char unit;
- int printed;
- const struct dso *dso = hists->dso_filter;
- const struct thread *thread = hists->thread_filter;
- int socket_id = hists->socket_filter;
- unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
- u64 nr_events = hists->stats.total_period;
- struct perf_evsel *evsel = hists_to_evsel(hists);
- const char *ev_name = perf_evsel__name(evsel);
- char buf[512], sample_freq_str[64] = "";
- size_t buflen = sizeof(buf);
- char ref[30] = " show reference callgraph, ";
- bool enable_ref = false;
+ int printed = __hists__scnprintf_title(browser->hists, bf, size, !is_report_browser(hbt));
- if (symbol_conf.filter_relative) {
- nr_samples = hists->stats.nr_non_filtered_samples;
- nr_events = hists->stats.total_non_filtered_period;
- }
-
- if (perf_evsel__is_group_event(evsel)) {
- struct perf_evsel *pos;
-
- perf_evsel__group_desc(evsel, buf, buflen);
- ev_name = buf;
-
- for_each_group_member(pos, evsel) {
- struct hists *pos_hists = evsel__hists(pos);
-
- if (symbol_conf.filter_relative) {
- nr_samples += pos_hists->stats.nr_non_filtered_samples;
- nr_events += pos_hists->stats.total_non_filtered_period;
- } else {
- nr_samples += pos_hists->stats.nr_events[PERF_RECORD_SAMPLE];
- nr_events += pos_hists->stats.total_period;
- }
- }
- }
-
- if (symbol_conf.show_ref_callgraph &&
- strstr(ev_name, "call-graph=no"))
- enable_ref = true;
-
- if (!is_report_browser(hbt))
- scnprintf(sample_freq_str, sizeof(sample_freq_str), " %d Hz,", evsel->attr.sample_freq);
-
- nr_samples = convert_unit(nr_samples, &unit);
- printed = scnprintf(bf, size,
- "Samples: %lu%c of event%s '%s',%s%sEvent count (approx.): %" PRIu64,
- nr_samples, unit, evsel->nr_members > 1 ? "s" : "",
- ev_name, sample_freq_str, enable_ref ? ref : " ", nr_events);
-
-
- if (hists->uid_filter_str)
- printed += snprintf(bf + printed, size - printed,
- ", UID: %s", hists->uid_filter_str);
- if (thread) {
- if (hists__has(hists, thread)) {
- printed += scnprintf(bf + printed, size - printed,
- ", Thread: %s(%d)",
- (thread->comm_set ? thread__comm_str(thread) : ""),
- thread->tid);
- } else {
- printed += scnprintf(bf + printed, size - printed,
- ", Thread: %s",
- (thread->comm_set ? thread__comm_str(thread) : ""));
- }
- }
- if (dso)
- printed += scnprintf(bf + printed, size - printed,
- ", DSO: %s", dso->short_name);
- if (socket_id > -1)
- printed += scnprintf(bf + printed, size - printed,
- ", Processor Socket: %d", socket_id);
if (!is_report_browser(hbt)) {
struct perf_top *top = hbt->arg;
@@ -2788,7 +2714,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
"h/?/F1 Show this window\n" \
"UP/DOWN/PGUP\n" \
"PGDN/SPACE Navigate\n" \
- "q/ESC/CTRL+C Exit browser\n\n" \
+ "q/ESC/CTRL+C Exit browser or go back to previous screen\n\n" \
"For multiple event sessions:\n\n" \
"TAB/UNTAB Switch events\n\n" \
"For symbolic views (--sort has sym):\n\n" \
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 3a428d7c59b9..536ee148bff8 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -17,6 +17,7 @@
#include "config.h"
#include "cache.h"
#include "symbol.h"
+#include "units.h"
#include "debug.h"
#include "annotate.h"
#include "evsel.h"
@@ -45,6 +46,7 @@
struct annotation_options annotation__default_options = {
.use_offset = true,
.jump_arrows = true,
+ .offset_level = ANNOTATION__OFFSET_JUMP_TARGETS,
};
const char *disassembler_style;
@@ -2324,7 +2326,7 @@ int symbol__tty_annotate2(struct symbol *sym, struct map *map,
struct dso *dso = map->dso;
struct rb_root source_line = RB_ROOT;
struct annotation_options opts = annotation__default_options;
- const char *ev_name = perf_evsel__name(evsel);
+ struct annotation *notes = symbol__annotation(sym);
char buf[1024];
if (symbol__annotate2(sym, map, evsel, &opts, NULL) < 0)
@@ -2336,12 +2338,8 @@ int symbol__tty_annotate2(struct symbol *sym, struct map *map,
print_summary(&source_line, dso->long_name);
}
- if (perf_evsel__is_group_event(evsel)) {
- perf_evsel__group_desc(evsel, buf, sizeof(buf));
- ev_name = buf;
- }
-
- fprintf(stdout, "%s() %s\nEvent: %s\n\n", sym->name, dso->long_name, ev_name);
+ annotation__scnprintf_samples_period(notes, buf, sizeof(buf), evsel);
+ fprintf(stdout, "%s\n%s() %s\n", buf, sym->name, dso->long_name);
symbol__annotate_fprintf2(sym, stdout);
annotated_source__purge(symbol__annotation(sym)->src);
@@ -2515,7 +2513,8 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
if (!notes->options->use_offset) {
printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr);
} else {
- if (al->jump_sources) {
+ if (al->jump_sources &&
+ notes->options->offset_level >= ANNOTATION__OFFSET_JUMP_TARGETS) {
if (notes->options->show_nr_jumps) {
int prev;
printed = scnprintf(bf, sizeof(bf), "%*d ",
@@ -2526,9 +2525,14 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
obj__printf(obj, bf);
obj__set_color(obj, prev);
}
-
+print_addr:
printed = scnprintf(bf, sizeof(bf), "%*" PRIx64 ": ",
notes->widths.target, addr);
+ } else if (ins__is_call(&disasm_line(al)->ins) &&
+ notes->options->offset_level >= ANNOTATION__OFFSET_CALL) {
+ goto print_addr;
+ } else if (notes->options->offset_level == ANNOTATION__MAX_OFFSET_LEVEL) {
+ goto print_addr;
} else {
printed = scnprintf(bf, sizeof(bf), "%-*s ",
notes->widths.addr, " ");
@@ -2597,6 +2601,46 @@ out_free_offsets:
return -1;
}
+int __annotation__scnprintf_samples_period(struct annotation *notes,
+ char *bf, size_t size,
+ struct perf_evsel *evsel,
+ bool show_freq)
+{
+ const char *ev_name = perf_evsel__name(evsel);
+ char buf[1024], ref[30] = " show reference callgraph, ";
+ char sample_freq_str[64] = "";
+ unsigned long nr_samples = 0;
+ int nr_members = 1;
+ bool enable_ref = false;
+ u64 nr_events = 0;
+ char unit;
+ int i;
+
+ if (perf_evsel__is_group_event(evsel)) {
+ perf_evsel__group_desc(evsel, buf, sizeof(buf));
+ ev_name = buf;
+ nr_members = evsel->nr_members;
+ }
+
+ for (i = 0; i < nr_members; i++) {
+ struct sym_hist *ah = annotation__histogram(notes, evsel->idx + i);
+
+ nr_samples += ah->nr_samples;
+ nr_events += ah->period;
+ }
+
+ if (symbol_conf.show_ref_callgraph && strstr(ev_name, "call-graph=no"))
+ enable_ref = true;
+
+ if (show_freq)
+ scnprintf(sample_freq_str, sizeof(sample_freq_str), " %d Hz,", evsel->attr.sample_freq);
+
+ nr_samples = convert_unit(nr_samples, &unit);
+ return scnprintf(bf, size, "Samples: %lu%c of event%s '%s',%s%sEvent count (approx.): %" PRIu64,
+ nr_samples, unit, evsel->nr_members > 1 ? "s" : "",
+ ev_name, sample_freq_str, enable_ref ? ref : " ", nr_events);
+}
+
#define ANNOTATION__CFG(n) \
{ .name = #n, .value = &annotation__default_options.n, }
@@ -2605,10 +2649,11 @@ out_free_offsets:
*/
static struct annotation_config {
const char *name;
- bool *value;
+ void *value;
} annotation__configs[] = {
ANNOTATION__CFG(hide_src_code),
ANNOTATION__CFG(jump_arrows),
+ ANNOTATION__CFG(offset_level),
ANNOTATION__CFG(show_linenr),
ANNOTATION__CFG(show_nr_jumps),
ANNOTATION__CFG(show_nr_samples),
@@ -2640,8 +2685,16 @@ static int annotation__config(const char *var, const char *value,
if (cfg == NULL)
pr_debug("%s variable unknown, ignoring...", var);
- else
- *cfg->value = perf_config_bool(name, value);
+ else if (strcmp(var, "annotate.offset_level") == 0) {
+ perf_config_int(cfg->value, name, value);
+
+ if (*(int *)cfg->value > ANNOTATION__MAX_OFFSET_LEVEL)
+ *(int *)cfg->value = ANNOTATION__MAX_OFFSET_LEVEL;
+ else if (*(int *)cfg->value < ANNOTATION__MIN_OFFSET_LEVEL)
+ *(int *)cfg->value = ANNOTATION__MIN_OFFSET_LEVEL;
+ } else {
+ *(bool *)cfg->value = perf_config_bool(name, value);
+ }
return 0;
}
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index ff7e3df31efa..f28a9e43421d 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -70,8 +70,17 @@ struct annotation_options {
show_nr_jumps,
show_nr_samples,
show_total_period;
+ u8 offset_level;
};
+enum {
+ ANNOTATION__OFFSET_JUMP_TARGETS = 1,
+ ANNOTATION__OFFSET_CALL,
+ ANNOTATION__MAX_OFFSET_LEVEL,
+};
+
+#define ANNOTATION__MIN_OFFSET_LEVEL ANNOTATION__OFFSET_JUMP_TARGETS
+
extern struct annotation_options annotation__default_options;
struct annotation;
@@ -151,6 +160,18 @@ double annotation_line__max_percent(struct annotation_line *al, struct annotatio
void annotation_line__write(struct annotation_line *al, struct annotation *notes,
struct annotation_write_ops *ops);
+int __annotation__scnprintf_samples_period(struct annotation *notes,
+ char *bf, size_t size,
+ struct perf_evsel *evsel,
+ bool show_freq);
+
+static inline int annotation__scnprintf_samples_period(struct annotation *notes,
+ char *bf, size_t size,
+ struct perf_evsel *evsel)
+{
+ return __annotation__scnprintf_samples_period(notes, bf, size, evsel, true);
+}
+
int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw);
size_t disasm__fprintf(struct list_head *head, FILE *fp);
void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel);
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index fb357a00dd86..857de69a5361 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -302,13 +302,27 @@ static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues,
return 0;
}
+static bool filter_cpu(struct perf_session *session, int cpu)
+{
+ unsigned long *cpu_bitmap = session->itrace_synth_opts->cpu_bitmap;
+
+ return cpu_bitmap && cpu != -1 && !test_bit(cpu, cpu_bitmap);
+}
+
static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues,
struct perf_session *session,
unsigned int idx,
struct auxtrace_buffer *buffer,
struct auxtrace_buffer **buffer_ptr)
{
- int err;
+ int err = -ENOMEM;
+
+ if (filter_cpu(session, buffer->cpu))
+ return 0;
+
+ buffer = memdup(buffer, sizeof(*buffer));
+ if (!buffer)
+ return -ENOMEM;
if (session->one_mmap) {
buffer->data = buffer->data_offset - session->one_mmap_offset +
@@ -316,31 +330,28 @@ static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues,
} else if (perf_data__is_pipe(session->data)) {
buffer->data = auxtrace_copy_data(buffer->size, session);
if (!buffer->data)
- return -ENOMEM;
+ goto out_free;
buffer->data_needs_freeing = true;
} else if (BITS_PER_LONG == 32 &&
buffer->size > BUFFER_LIMIT_FOR_32_BIT) {
err = auxtrace_queues__split_buffer(queues, idx, buffer);
if (err)
- return err;
+ goto out_free;
}
err = auxtrace_queues__queue_buffer(queues, idx, buffer);
if (err)
- return err;
+ goto out_free;
/* FIXME: Doesn't work for split buffer */
if (buffer_ptr)
*buffer_ptr = buffer;
return 0;
-}
-static bool filter_cpu(struct perf_session *session, int cpu)
-{
- unsigned long *cpu_bitmap = session->itrace_synth_opts->cpu_bitmap;
-
- return cpu_bitmap && cpu != -1 && !test_bit(cpu, cpu_bitmap);
+out_free:
+ auxtrace_buffer__free(buffer);
+ return err;
}
int auxtrace_queues__add_event(struct auxtrace_queues *queues,
@@ -348,36 +359,19 @@ int auxtrace_queues__add_event(struct auxtrace_queues *queues,
union perf_event *event, off_t data_offset,
struct auxtrace_buffer **buffer_ptr)
{
- struct auxtrace_buffer *buffer;
- unsigned int idx;
- int err;
-
- if (filter_cpu(session, event->auxtrace.cpu))
- return 0;
-
- buffer = zalloc(sizeof(struct auxtrace_buffer));
- if (!buffer)
- return -ENOMEM;
-
- buffer->pid = -1;
- buffer->tid = event->auxtrace.tid;
- buffer->cpu = event->auxtrace.cpu;
- buffer->data_offset = data_offset;
- buffer->offset = event->auxtrace.offset;
- buffer->reference = event->auxtrace.reference;
- buffer->size = event->auxtrace.size;
- idx = event->auxtrace.idx;
-
- err = auxtrace_queues__add_buffer(queues, session, idx, buffer,
- buffer_ptr);
- if (err)
- goto out_err;
-
- return 0;
+ struct auxtrace_buffer buffer = {
+ .pid = -1,
+ .tid = event->auxtrace.tid,
+ .cpu = event->auxtrace.cpu,
+ .data_offset = data_offset,
+ .offset = event->auxtrace.offset,
+ .reference = event->auxtrace.reference,
+ .size = event->auxtrace.size,
+ };
+ unsigned int idx = event->auxtrace.idx;
-out_err:
- auxtrace_buffer__free(buffer);
- return err;
+ return auxtrace_queues__add_buffer(queues, session, idx, &buffer,
+ buffer_ptr);
}
static int auxtrace_queues__add_indexed_event(struct auxtrace_queues *queues,
diff --git a/tools/perf/util/c++/clang-test.cpp b/tools/perf/util/c++/clang-test.cpp
index a4014d786676..7b042a5ebc68 100644
--- a/tools/perf/util/c++/clang-test.cpp
+++ b/tools/perf/util/c++/clang-test.cpp
@@ -41,7 +41,7 @@ int test__clang_to_IR(void)
if (!M)
return -1;
for (llvm::Function& F : *M)
- if (F.getName() == "bpf_func__SyS_epoll_wait")
+ if (F.getName() == "bpf_func__SyS_epoll_pwait")
return 0;
return -1;
}
diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp
index 1bfc946e37dc..bf31ceab33bd 100644
--- a/tools/perf/util/c++/clang.cpp
+++ b/tools/perf/util/c++/clang.cpp
@@ -9,6 +9,7 @@
* Copyright (C) 2016 Huawei Inc.
*/
+#include "clang/Basic/Version.h"
#include "clang/CodeGen/CodeGenAction.h"
#include "clang/Frontend/CompilerInvocation.h"
#include "clang/Frontend/CompilerInstance.h"
@@ -58,7 +59,8 @@ createCompilerInvocation(llvm::opt::ArgStringList CFlags, StringRef& Path,
FrontendOptions& Opts = CI->getFrontendOpts();
Opts.Inputs.clear();
- Opts.Inputs.emplace_back(Path, IK_C);
+ Opts.Inputs.emplace_back(Path,
+ FrontendOptions::getInputKindForExtension("c"));
return CI;
}
@@ -71,10 +73,17 @@ getModuleFromSource(llvm::opt::ArgStringList CFlags,
Clang.setVirtualFileSystem(&*VFS);
+#if CLANG_VERSION_MAJOR < 4
IntrusiveRefCntPtr<CompilerInvocation> CI =
createCompilerInvocation(std::move(CFlags), Path,
Clang.getDiagnostics());
Clang.setInvocation(&*CI);
+#else
+ std::shared_ptr<CompilerInvocation> CI(
+ createCompilerInvocation(std::move(CFlags), Path,
+ Clang.getDiagnostics()));
+ Clang.setInvocation(CI);
+#endif
std::unique_ptr<CodeGenAction> Act(new EmitLLVMOnlyAction(&*LLVMCtx));
if (!Clang.ExecuteAction(*Act))
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 640af88331b4..c8b98fa22997 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * SPDX-License-Identifier: GPL-2.0
- *
* Copyright(C) 2015-2018 Linaro Limited.
*
* Author: Tor Jeremiassen <tor@ti.com>
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 1b0d422373be..40020b1ca54f 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * SPDX-License-Identifier: GPL-2.0
- *
* Copyright(C) 2015-2018 Linaro Limited.
*
* Author: Tor Jeremiassen <tor@ti.com>
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
index 5864d5dca616..37f8d48179ca 100644
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -1,18 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright(C) 2015 Linaro Limited. All rights reserved.
* Author: Mathieu Poirier <mathieu.poirier@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef INCLUDE__UTIL_PERF_CS_ETM_H__
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index f5acda13dcfa..7eb7de5aee44 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -979,7 +979,7 @@ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf)
return ret < 0 ? ret : strbuf_addf(buf, "\t%s", dwarf_diename(vr_die));
}
-#ifdef HAVE_DWARF_GETLOCATIONS
+#ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT
/**
* die_get_var_innermost_scope - Get innermost scope range of given variable DIE
* @sp_die: a subprogram DIE
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index f0a6cbd033cc..98ff3a6a3d50 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1421,7 +1421,9 @@ size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp)
size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp)
{
bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
- const char *in_out = out ? "OUT" : "IN ";
+ const char *in_out = !out ? "IN " :
+ !(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT) ?
+ "OUT " : "OUT preempt";
if (event->header.type == PERF_RECORD_SWITCH)
return fprintf(fp, " %s\n", in_out);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 1ac8d9236efd..4cd2cf93f726 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -930,8 +930,11 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
* than leader in case leader 'leads' the sampling.
*/
if ((leader != evsel) && leader->sample_read) {
- attr->sample_freq = 0;
- attr->sample_period = 0;
+ attr->freq = 0;
+ attr->sample_freq = 0;
+ attr->sample_period = 0;
+ attr->write_backward = 0;
+ attr->sample_id_all = 0;
}
if (opts->no_samples)
@@ -1922,7 +1925,8 @@ try_fallback:
goto fallback_missing_features;
} else if (!perf_missing_features.group_read &&
evsel->attr.inherit &&
- (evsel->attr.read_format & PERF_FORMAT_GROUP)) {
+ (evsel->attr.read_format & PERF_FORMAT_GROUP) &&
+ perf_evsel__is_group_leader(evsel)) {
perf_missing_features.group_read = true;
pr_debug2("switching off group read\n");
goto fallback_missing_features;
@@ -2754,8 +2758,14 @@ bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
(paranoid = perf_event_paranoid()) > 1) {
const char *name = perf_evsel__name(evsel);
char *new_name;
+ const char *sep = ":";
- if (asprintf(&new_name, "%s%su", name, strchr(name, ':') ? "" : ":") < 0)
+ /* Is there already the separator in the name. */
+ if (strchr(name, '/') ||
+ strchr(name, ':'))
+ sep = "";
+
+ if (asprintf(&new_name, "%s%su", name, sep) < 0)
return false;
if (evsel->name)
@@ -2870,8 +2880,7 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
#if defined(__i386__) || defined(__x86_64__)
if (evsel->attr.type == PERF_TYPE_HARDWARE)
return scnprintf(msg, size, "%s",
- "No hardware sampling interrupt available.\n"
- "No APIC? If so then you can boot the kernel with the \"lapic\" boot parameter to force-enable it.");
+ "No hardware sampling interrupt available.\n");
#endif
break;
case EBUSY:
@@ -2894,8 +2903,7 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
return scnprintf(msg, size,
"The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n"
- "/bin/dmesg may provide additional information.\n"
- "No CONFIG_PERF_EVENTS=y kernel support configured?",
+ "/bin/dmesg | grep -i perf may provide additional information.\n",
err, str_error_r(err, sbuf, sizeof(sbuf)),
perf_evsel__name(evsel));
}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index d3ee3af618ef..92ec009a292d 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -115,6 +115,7 @@ struct perf_evsel {
unsigned int sample_size;
int id_pos;
int is_pos;
+ bool uniquified_name;
bool snapshot;
bool supported;
bool needs_swap;
diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh
index ff17920a5ebc..c3cef36d4176 100755
--- a/tools/perf/util/generate-cmdlist.sh
+++ b/tools/perf/util/generate-cmdlist.sh
@@ -38,7 +38,7 @@ do
done
echo "#endif /* HAVE_LIBELF_SUPPORT */"
-echo "#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE)"
+echo "#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)"
sed -n -e 's/^perf-\([^ ]*\)[ ].* audit*/\1/p' command-list.txt |
sort |
while read cmd
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 121df1683c36..a8bff2178fbc 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1320,7 +1320,8 @@ static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp)
dir = opendir(path);
if (!dir) {
- pr_warning("failed: can't open node sysfs data\n");
+ pr_debug2("%s: could't read %s, does this arch have topology information?\n",
+ __func__, path);
return -1;
}
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 7d968892ee39..4d602fba40b2 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -6,6 +6,7 @@
#include "session.h"
#include "namespaces.h"
#include "sort.h"
+#include "units.h"
#include "evlist.h"
#include "evsel.h"
#include "annotate.h"
@@ -14,6 +15,7 @@
#include "ui/progress.h"
#include <errno.h>
#include <math.h>
+#include <inttypes.h>
#include <sys/param.h>
static bool hists__filter_entry_by_dso(struct hists *hists,
@@ -2454,6 +2456,85 @@ u64 hists__total_period(struct hists *hists)
hists->stats.total_period;
}
+int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool show_freq)
+{
+ char unit;
+ int printed;
+ const struct dso *dso = hists->dso_filter;
+ const struct thread *thread = hists->thread_filter;
+ int socket_id = hists->socket_filter;
+ unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
+ u64 nr_events = hists->stats.total_period;
+ struct perf_evsel *evsel = hists_to_evsel(hists);
+ const char *ev_name = perf_evsel__name(evsel);
+ char buf[512], sample_freq_str[64] = "";
+ size_t buflen = sizeof(buf);
+ char ref[30] = " show reference callgraph, ";
+ bool enable_ref = false;
+
+ if (symbol_conf.filter_relative) {
+ nr_samples = hists->stats.nr_non_filtered_samples;
+ nr_events = hists->stats.total_non_filtered_period;
+ }
+
+ if (perf_evsel__is_group_event(evsel)) {
+ struct perf_evsel *pos;
+
+ perf_evsel__group_desc(evsel, buf, buflen);
+ ev_name = buf;
+
+ for_each_group_member(pos, evsel) {
+ struct hists *pos_hists = evsel__hists(pos);
+
+ if (symbol_conf.filter_relative) {
+ nr_samples += pos_hists->stats.nr_non_filtered_samples;
+ nr_events += pos_hists->stats.total_non_filtered_period;
+ } else {
+ nr_samples += pos_hists->stats.nr_events[PERF_RECORD_SAMPLE];
+ nr_events += pos_hists->stats.total_period;
+ }
+ }
+ }
+
+ if (symbol_conf.show_ref_callgraph &&
+ strstr(ev_name, "call-graph=no"))
+ enable_ref = true;
+
+ if (show_freq)
+ scnprintf(sample_freq_str, sizeof(sample_freq_str), " %d Hz,", evsel->attr.sample_freq);
+
+ nr_samples = convert_unit(nr_samples, &unit);
+ printed = scnprintf(bf, size,
+ "Samples: %lu%c of event%s '%s',%s%sEvent count (approx.): %" PRIu64,
+ nr_samples, unit, evsel->nr_members > 1 ? "s" : "",
+ ev_name, sample_freq_str, enable_ref ? ref : " ", nr_events);
+
+
+ if (hists->uid_filter_str)
+ printed += snprintf(bf + printed, size - printed,
+ ", UID: %s", hists->uid_filter_str);
+ if (thread) {
+ if (hists__has(hists, thread)) {
+ printed += scnprintf(bf + printed, size - printed,
+ ", Thread: %s(%d)",
+ (thread->comm_set ? thread__comm_str(thread) : ""),
+ thread->tid);
+ } else {
+ printed += scnprintf(bf + printed, size - printed,
+ ", Thread: %s",
+ (thread->comm_set ? thread__comm_str(thread) : ""));
+ }
+ }
+ if (dso)
+ printed += scnprintf(bf + printed, size - printed,
+ ", DSO: %s", dso->short_name);
+ if (socket_id > -1)
+ printed += scnprintf(bf + printed, size - printed,
+ ", Processor Socket: %d", socket_id);
+
+ return printed;
+}
+
int parse_filter_percentage(const struct option *opt __maybe_unused,
const char *arg, int unset __maybe_unused)
{
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index e869cad4d89f..fbabfd8a215d 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -61,6 +61,7 @@ enum hist_column {
HISTC_SRCLINE_TO,
HISTC_TRACE,
HISTC_SYM_SIZE,
+ HISTC_DSO_SIZE,
HISTC_NR_COLS, /* Last entry */
};
@@ -503,5 +504,11 @@ int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...);
int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
struct perf_hpp_list *hpp_list);
int hists__fprintf_headers(struct hists *hists, FILE *fp);
+int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool show_freq);
+
+static inline int hists__scnprintf_title(struct hists *hists, char *bf, size_t size)
+{
+ return __hists__scnprintf_title(hists, bf, size, true);
+}
#endif /* __PERF_HIST_H */
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 2eca8478e24f..32d50492505d 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1019,13 +1019,6 @@ int machine__load_vmlinux_path(struct machine *machine, enum map_type type)
return ret;
}
-static void map_groups__fixup_end(struct map_groups *mg)
-{
- int i;
- for (i = 0; i < MAP__NR_TYPES; ++i)
- __map_groups__fixup_end(mg, i);
-}
-
static char *get_kernel_version(const char *root_dir)
{
char version[PATH_MAX];
@@ -1233,6 +1226,7 @@ int machine__create_kernel_maps(struct machine *machine)
{
struct dso *kernel = machine__get_kernel(machine);
const char *name = NULL;
+ struct map *map;
u64 addr = 0;
int ret;
@@ -1259,13 +1253,25 @@ int machine__create_kernel_maps(struct machine *machine)
machine__destroy_kernel_maps(machine);
return -1;
}
- machine__set_kernel_mmap(machine, addr, 0);
+
+ /* we have a real start address now, so re-order the kmaps */
+ map = machine__kernel_map(machine);
+
+ map__get(map);
+ map_groups__remove(&machine->kmaps, map);
+
+ /* assume it's the last in the kmaps */
+ machine__set_kernel_mmap(machine, addr, ~0ULL);
+
+ map_groups__insert(&machine->kmaps, map);
+ map__put(map);
}
- /*
- * Now that we have all the maps created, just set the ->end of them:
- */
- map_groups__fixup_end(&machine->kmaps);
+ /* update end address of the kernel map using adjacent module address */
+ map = map__next(machine__kernel_map(machine));
+ if (map)
+ machine__set_kernel_mmap(machine, addr, map->start);
+
return 0;
}
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index edeb7291c8e1..0e9bbe01b0ab 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -103,6 +103,10 @@ static inline u64 identity__map_ip(struct map *map __maybe_unused, u64 ip)
return ip;
}
+static inline size_t map__size(const struct map *map)
+{
+ return map->end - map->start;
+}
/* rip/ip <-> addr suitable for passing to `objdump --start-address=` */
u64 map__rip_2objdump(struct map *map, u64 rip);
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 7afeb80cc39e..d14464c42714 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -224,15 +224,15 @@ event_def: event_pmu |
event_bpf_file
event_pmu:
-PE_NAME opt_event_config
+PE_NAME '/' event_config '/'
{
struct list_head *list, *orig_terms, *terms;
- if (parse_events_copy_term_list($2, &orig_terms))
+ if (parse_events_copy_term_list($3, &orig_terms))
YYABORT;
ALLOC_LIST(list);
- if (parse_events_add_pmu(_parse_state, list, $1, $2, false)) {
+ if (parse_events_add_pmu(_parse_state, list, $1, $3, false)) {
struct perf_pmu *pmu = NULL;
int ok = 0;
char *pattern;
@@ -262,7 +262,7 @@ PE_NAME opt_event_config
if (!ok)
YYABORT;
}
- parse_events_terms__delete($2);
+ parse_events_terms__delete($3);
parse_events_terms__delete(orig_terms);
$$ = list;
}
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 064bdcb7bd78..d2fb597c9a8c 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -539,9 +539,10 @@ static bool pmu_is_uncore(const char *name)
/*
* PMU CORE devices have different name other than cpu in sysfs on some
- * platforms. looking for possible sysfs files to identify as core device.
+ * platforms.
+ * Looking for possible sysfs files to identify the arm core device.
*/
-static int is_pmu_core(const char *name)
+static int is_arm_pmu_core(const char *name)
{
struct stat st;
char path[PATH_MAX];
@@ -550,18 +551,18 @@ static int is_pmu_core(const char *name)
if (!sysfs)
return 0;
- /* Look for cpu sysfs (x86 and others) */
- scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu", sysfs);
- if ((stat(path, &st) == 0) &&
- (strncmp(name, "cpu", strlen("cpu")) == 0))
- return 1;
-
/* Look for cpu sysfs (specific to arm) */
scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/cpus",
sysfs, name);
if (stat(path, &st) == 0)
return 1;
+ /* Look for cpu sysfs (specific to s390) */
+ scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s",
+ sysfs, name);
+ if (stat(path, &st) == 0 && !strncmp(name, "cpum_", 5))
+ return 1;
+
return 0;
}
@@ -580,7 +581,7 @@ char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
* cpuid string generated on this platform.
* Otherwise return non-zero.
*/
-int __weak strcmp_cpuid_str(const char *mapcpuid, const char *cpuid)
+int strcmp_cpuid_str(const char *mapcpuid, const char *cpuid)
{
regex_t re;
regmatch_t pmatch[1];
@@ -662,6 +663,7 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
struct pmu_events_map *map;
struct pmu_event *pe;
const char *name = pmu->name;
+ const char *pname;
map = perf_pmu__find_map(pmu);
if (!map)
@@ -680,11 +682,9 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
break;
}
- if (!is_pmu_core(name)) {
- /* check for uncore devices */
- if (pe->pmu == NULL)
- continue;
- if (strncmp(pe->pmu, name, strlen(pe->pmu)))
+ if (!is_arm_pmu_core(name)) {
+ pname = pe->pmu ? pe->pmu : "cpu";
+ if (strncmp(pname, name, strlen(pname)))
continue;
}
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index c71ced7db152..f4a7a437ee87 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1591,7 +1591,7 @@ static void perf_session__warn_about_errors(const struct perf_session *session)
drop_rate = (double)stats->total_lost_samples /
(double) (stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples);
if (drop_rate > 0.05) {
- ui__warning("Processed %" PRIu64 " samples and lost %3.2f%% samples!\n\n",
+ ui__warning("Processed %" PRIu64 " samples and lost %3.2f%%!\n\n",
stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples,
drop_rate * 100.0);
}
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index e8514f651865..26a68dfd8a4f 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1545,6 +1545,46 @@ struct sort_entry sort_sym_size = {
.se_width_idx = HISTC_SYM_SIZE,
};
+/* --sort dso_size */
+
+static int64_t _sort__dso_size_cmp(struct map *map_l, struct map *map_r)
+{
+ int64_t size_l = map_l != NULL ? map__size(map_l) : 0;
+ int64_t size_r = map_r != NULL ? map__size(map_r) : 0;
+
+ return size_l < size_r ? -1 :
+ size_l == size_r ? 0 : 1;
+}
+
+static int64_t
+sort__dso_size_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return _sort__dso_size_cmp(right->ms.map, left->ms.map);
+}
+
+static int _hist_entry__dso_size_snprintf(struct map *map, char *bf,
+ size_t bf_size, unsigned int width)
+{
+ if (map && map->dso)
+ return repsep_snprintf(bf, bf_size, "%*d", width,
+ map__size(map));
+
+ return repsep_snprintf(bf, bf_size, "%*s", width, "unknown");
+}
+
+static int hist_entry__dso_size_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return _hist_entry__dso_size_snprintf(he->ms.map, bf, size, width);
+}
+
+struct sort_entry sort_dso_size = {
+ .se_header = "DSO size",
+ .se_cmp = sort__dso_size_cmp,
+ .se_snprintf = hist_entry__dso_size_snprintf,
+ .se_width_idx = HISTC_DSO_SIZE,
+};
+
struct sort_dimension {
const char *name;
@@ -1569,6 +1609,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_TRANSACTION, "transaction", sort_transaction),
DIM(SORT_TRACE, "trace", sort_trace),
DIM(SORT_SYM_SIZE, "symbol_size", sort_sym_size),
+ DIM(SORT_DSO_SIZE, "dso_size", sort_dso_size),
DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id),
};
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index f5901c10a563..035b62e2c60b 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -220,6 +220,7 @@ enum sort_type {
SORT_TRANSACTION,
SORT_TRACE,
SORT_SYM_SIZE,
+ SORT_DSO_SIZE,
SORT_CGROUP_ID,
/* branch stack specific sort keys */
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 62b2dd2253eb..1466814ebada 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -2091,16 +2091,14 @@ static bool symbol__read_kptr_restrict(void)
int symbol__annotation_init(void)
{
+ if (symbol_conf.init_annotation)
+ return 0;
+
if (symbol_conf.initialized) {
pr_err("Annotation needs to be init before symbol__init()\n");
return -1;
}
- if (symbol_conf.init_annotation) {
- pr_warning("Annotation being initialized multiple times\n");
- return 0;
- }
-
symbol_conf.priv_size += sizeof(struct annotation);
symbol_conf.init_annotation = true;
return 0;
diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
index 895122d638dd..0ee7f568d60c 100644
--- a/tools/perf/util/syscalltbl.c
+++ b/tools/perf/util/syscalltbl.c
@@ -17,7 +17,7 @@
#include <stdlib.h>
#include <linux/compiler.h>
-#ifdef HAVE_SYSCALL_TABLE
+#ifdef HAVE_SYSCALL_TABLE_SUPPORT
#include <string.h>
#include "string2.h"
#include "util.h"
@@ -139,7 +139,7 @@ int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_g
return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
}
-#else /* HAVE_SYSCALL_TABLE */
+#else /* HAVE_SYSCALL_TABLE_SUPPORT */
#include <libaudit.h>
@@ -176,4 +176,4 @@ int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_g
{
return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
}
-#endif /* HAVE_SYSCALL_TABLE */
+#endif /* HAVE_SYSCALL_TABLE_SUPPORT */
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
index 0ac9077f62a2..b1e5c3a2b8e3 100644
--- a/tools/perf/util/trace-event-scripting.c
+++ b/tools/perf/util/trace-event-scripting.c
@@ -98,7 +98,7 @@ static void register_python_scripting(struct scripting_ops *scripting_ops)
}
}
-#ifdef NO_LIBPYTHON
+#ifndef HAVE_LIBPYTHON_SUPPORT
void setup_python_scripting(void)
{
register_python_scripting(&python_scripting_unsupported_ops);
@@ -161,7 +161,7 @@ static void register_perl_scripting(struct scripting_ops *scripting_ops)
}
}
-#ifdef NO_LIBPERL
+#ifndef HAVE_LIBPERL_SUPPORT
void setup_perl_scripting(void)
{
register_perl_scripting(&perl_scripting_unsupported_ops);
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 9496365da3d7..c9626c206208 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -11,8 +11,7 @@
#include <stdlib.h>
#include <stdarg.h>
#include <linux/compiler.h>
-#include <linux/types.h>
-#include "namespaces.h"
+#include <sys/types.h>
/* General helper functions */
void usage(const char *err) __noreturn;
@@ -26,6 +25,7 @@ static inline void *zalloc(size_t size)
#define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
struct dirent;
+struct nsinfo;
struct strlist;
int mkdir_p(char *path, mode_t mode);
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index dd614463d4d6..495066bafbe3 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -120,3 +120,5 @@ ifneq ($(silent),1)
QUIET_UNINST = @printf ' UNINST %s\n' $1;
endif
endif
+
+pound := \#
diff --git a/tools/testing/ktest/config-bisect.pl b/tools/testing/ktest/config-bisect.pl
new file mode 100755
index 000000000000..b28feea7c363
--- /dev/null
+++ b/tools/testing/ktest/config-bisect.pl
@@ -0,0 +1,770 @@
+#!/usr/bin/perl -w
+#
+# Copyright 2015 - Steven Rostedt, Red Hat Inc.
+# Copyright 2017 - Steven Rostedt, VMware, Inc.
+#
+# Licensed under the terms of the GNU GPL License version 2
+#
+
+# usage:
+# config-bisect.pl [options] good-config bad-config [good|bad]
+#
+
+# Compares a good config to a bad config, then takes half of the diffs
+# and produces a config that is somewhere between the good config and
+# the bad config. That is, the resulting config will start with the
+# good config and will try to make half of the differences of between
+# the good and bad configs match the bad config. It tries because of
+# dependencies between the two configs it may not be able to change
+# exactly half of the configs that are different between the two config
+# files.
+
+# Here's a normal way to use it:
+#
+# $ cd /path/to/linux/kernel
+# $ config-bisect.pl /path/to/good/config /path/to/bad/config
+
+# This will now pull in good config (blowing away .config in that directory
+# so do not make that be one of the good or bad configs), and then
+# build the config with "make oldconfig" to make sure it matches the
+# current kernel. It will then store the configs in that result for
+# the good config. It does the same for the bad config as well.
+# The algorithm will run, merging half of the differences between
+# the two configs and building them with "make oldconfig" to make sure
+# the result changes (dependencies may reset changes the tool had made).
+# It then copies the result of its good config to /path/to/good/config.tmp
+# and the bad config to /path/to/bad/config.tmp (just appends ".tmp" to the
+# files passed in). And the ".config" that you should test will be in
+# directory
+
+# After the first run, determine if the result is good or bad then
+# run the same command appending the result
+
+# For good results:
+# $ config-bisect.pl /path/to/good/config /path/to/bad/config good
+
+# For bad results:
+# $ config-bisect.pl /path/to/good/config /path/to/bad/config bad
+
+# Do not change the good-config or bad-config, config-bisect.pl will
+# copy the good-config to a temp file with the same name as good-config
+# but with a ".tmp" after it. It will do the same with the bad-config.
+
+# If "good" or "bad" is not stated at the end, it will copy the good and
+# bad configs to the .tmp versions. If a .tmp version already exists, it will
+# warn before writing over them (-r will not warn, and just write over them).
+# If the last config is labeled "good", then it will copy it to the good .tmp
+# version. If the last config is labeled "bad", it will copy it to the bad
+# .tmp version. It will continue this until it can not merge the two any more
+# without the result being equal to either the good or bad .tmp configs.
+
+my $start = 0;
+my $val = "";
+
+my $pwd = `pwd`;
+chomp $pwd;
+my $tree = $pwd;
+my $build;
+
+my $output_config;
+my $reset_bisect;
+
+sub usage {
+ print << "EOF"
+
+usage: config-bisect.pl [-l linux-tree][-b build-dir] good-config bad-config [good|bad]
+ -l [optional] define location of linux-tree (default is current directory)
+ -b [optional] define location to build (O=build-dir) (default is linux-tree)
+ good-config the config that is considered good
+ bad-config the config that does not work
+ "good" add this if the last run produced a good config
+ "bad" add this if the last run produced a bad config
+ If "good" or "bad" is not specified, then it is the start of a new bisect
+
+ Note, each run will create copy of good and bad configs with ".tmp" appended.
+
+EOF
+;
+
+ exit(-1);
+}
+
+sub doprint {
+ print @_;
+}
+
+sub dodie {
+ doprint "CRITICAL FAILURE... ", @_, "\n";
+
+ die @_, "\n";
+}
+
+sub expand_path {
+ my ($file) = @_;
+
+ if ($file =~ m,^/,) {
+ return $file;
+ }
+ return "$pwd/$file";
+}
+
+sub read_prompt {
+ my ($cancel, $prompt) = @_;
+
+ my $ans;
+
+ for (;;) {
+ if ($cancel) {
+ print "$prompt [y/n/C] ";
+ } else {
+ print "$prompt [y/N] ";
+ }
+ $ans = <STDIN>;
+ chomp $ans;
+ if ($ans =~ /^\s*$/) {
+ if ($cancel) {
+ $ans = "c";
+ } else {
+ $ans = "n";
+ }
+ }
+ last if ($ans =~ /^y$/i || $ans =~ /^n$/i);
+ if ($cancel) {
+ last if ($ans =~ /^c$/i);
+ print "Please answer either 'y', 'n' or 'c'.\n";
+ } else {
+ print "Please answer either 'y' or 'n'.\n";
+ }
+ }
+ if ($ans =~ /^c/i) {
+ exit;
+ }
+ if ($ans !~ /^y$/i) {
+ return 0;
+ }
+ return 1;
+}
+
+sub read_yn {
+ my ($prompt) = @_;
+
+ return read_prompt 0, $prompt;
+}
+
+sub read_ync {
+ my ($prompt) = @_;
+
+ return read_prompt 1, $prompt;
+}
+
+sub run_command {
+ my ($command, $redirect) = @_;
+ my $start_time;
+ my $end_time;
+ my $dord = 0;
+ my $pid;
+
+ $start_time = time;
+
+ doprint("$command ... ");
+
+ $pid = open(CMD, "$command 2>&1 |") or
+ dodie "unable to exec $command";
+
+ if (defined($redirect)) {
+ open (RD, ">$redirect") or
+ dodie "failed to write to redirect $redirect";
+ $dord = 1;
+ }
+
+ while (<CMD>) {
+ print RD if ($dord);
+ }
+
+ waitpid($pid, 0);
+ my $failed = $?;
+
+ close(CMD);
+ close(RD) if ($dord);
+
+ $end_time = time;
+ my $delta = $end_time - $start_time;
+
+ if ($delta == 1) {
+ doprint "[1 second] ";
+ } else {
+ doprint "[$delta seconds] ";
+ }
+
+ if ($failed) {
+ doprint "FAILED!\n";
+ } else {
+ doprint "SUCCESS\n";
+ }
+
+ return !$failed;
+}
+
+###### CONFIG BISECT ######
+
+# config_ignore holds the configs that were set (or unset) for
+# a good config and we will ignore these configs for the rest
+# of a config bisect. These configs stay as they were.
+my %config_ignore;
+
+# config_set holds what all configs were set as.
+my %config_set;
+
+# config_off holds the set of configs that the bad config had disabled.
+# We need to record them and set them in the .config when running
+# olddefconfig, because olddefconfig keeps the defaults.
+my %config_off;
+
+# config_off_tmp holds a set of configs to turn off for now
+my @config_off_tmp;
+
+# config_list is the set of configs that are being tested
+my %config_list;
+my %null_config;
+
+my %dependency;
+
+my $make;
+
+sub make_oldconfig {
+
+ if (!run_command "$make olddefconfig") {
+ # Perhaps olddefconfig doesn't exist in this version of the kernel
+ # try oldnoconfig
+ doprint "olddefconfig failed, trying make oldnoconfig\n";
+ if (!run_command "$make oldnoconfig") {
+ doprint "oldnoconfig failed, trying yes '' | make oldconfig\n";
+ # try a yes '' | oldconfig
+ run_command "yes '' | $make oldconfig" or
+ dodie "failed make config oldconfig";
+ }
+ }
+}
+
+sub assign_configs {
+ my ($hash, $config) = @_;
+
+ doprint "Reading configs from $config\n";
+
+ open (IN, $config)
+ or dodie "Failed to read $config";
+
+ while (<IN>) {
+ chomp;
+ if (/^((CONFIG\S*)=.*)/) {
+ ${$hash}{$2} = $1;
+ } elsif (/^(# (CONFIG\S*) is not set)/) {
+ ${$hash}{$2} = $1;
+ }
+ }
+
+ close(IN);
+}
+
+sub process_config_ignore {
+ my ($config) = @_;
+
+ assign_configs \%config_ignore, $config;
+}
+
+sub get_dependencies {
+ my ($config) = @_;
+
+ my $arr = $dependency{$config};
+ if (!defined($arr)) {
+ return ();
+ }
+
+ my @deps = @{$arr};
+
+ foreach my $dep (@{$arr}) {
+ print "ADD DEP $dep\n";
+ @deps = (@deps, get_dependencies $dep);
+ }
+
+ return @deps;
+}
+
+sub save_config {
+ my ($pc, $file) = @_;
+
+ my %configs = %{$pc};
+
+ doprint "Saving configs into $file\n";
+
+ open(OUT, ">$file") or dodie "Can not write to $file";
+
+ foreach my $config (keys %configs) {
+ print OUT "$configs{$config}\n";
+ }
+ close(OUT);
+}
+
+sub create_config {
+ my ($name, $pc) = @_;
+
+ doprint "Creating old config from $name configs\n";
+
+ save_config $pc, $output_config;
+
+ make_oldconfig;
+}
+
+# compare two config hashes, and return configs with different vals.
+# It returns B's config values, but you can use A to see what A was.
+sub diff_config_vals {
+ my ($pa, $pb) = @_;
+
+ # crappy Perl way to pass in hashes.
+ my %a = %{$pa};
+ my %b = %{$pb};
+
+ my %ret;
+
+ foreach my $item (keys %a) {
+ if (defined($b{$item}) && $b{$item} ne $a{$item}) {
+ $ret{$item} = $b{$item};
+ }
+ }
+
+ return %ret;
+}
+
+# compare two config hashes and return the configs in B but not A
+sub diff_configs {
+ my ($pa, $pb) = @_;
+
+ my %ret;
+
+ # crappy Perl way to pass in hashes.
+ my %a = %{$pa};
+ my %b = %{$pb};
+
+ foreach my $item (keys %b) {
+ if (!defined($a{$item})) {
+ $ret{$item} = $b{$item};
+ }
+ }
+
+ return %ret;
+}
+
+# return if two configs are equal or not
+# 0 is equal +1 b has something a does not
+# +1 if a and b have a different item.
+# -1 if a has something b does not
+sub compare_configs {
+ my ($pa, $pb) = @_;
+
+ my %ret;
+
+ # crappy Perl way to pass in hashes.
+ my %a = %{$pa};
+ my %b = %{$pb};
+
+ foreach my $item (keys %b) {
+ if (!defined($a{$item})) {
+ return 1;
+ }
+ if ($a{$item} ne $b{$item}) {
+ return 1;
+ }
+ }
+
+ foreach my $item (keys %a) {
+ if (!defined($b{$item})) {
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+sub process_failed {
+ my ($config) = @_;
+
+ doprint "\n\n***************************************\n";
+ doprint "Found bad config: $config\n";
+ doprint "***************************************\n\n";
+}
+
+sub process_new_config {
+ my ($tc, $nc, $gc, $bc) = @_;
+
+ my %tmp_config = %{$tc};
+ my %good_configs = %{$gc};
+ my %bad_configs = %{$bc};
+
+ my %new_configs;
+
+ my $runtest = 1;
+ my $ret;
+
+ create_config "tmp_configs", \%tmp_config;
+ assign_configs \%new_configs, $output_config;
+
+ $ret = compare_configs \%new_configs, \%bad_configs;
+ if (!$ret) {
+ doprint "New config equals bad config, try next test\n";
+ $runtest = 0;
+ }
+
+ if ($runtest) {
+ $ret = compare_configs \%new_configs, \%good_configs;
+ if (!$ret) {
+ doprint "New config equals good config, try next test\n";
+ $runtest = 0;
+ }
+ }
+
+ %{$nc} = %new_configs;
+
+ return $runtest;
+}
+
+sub convert_config {
+ my ($config) = @_;
+
+ if ($config =~ /^# (.*) is not set/) {
+ $config = "$1=n";
+ }
+
+ $config =~ s/^CONFIG_//;
+ return $config;
+}
+
+sub print_config {
+ my ($sym, $config) = @_;
+
+ $config = convert_config $config;
+ doprint "$sym$config\n";
+}
+
+sub print_config_compare {
+ my ($good_config, $bad_config) = @_;
+
+ $good_config = convert_config $good_config;
+ $bad_config = convert_config $bad_config;
+
+ my $good_value = $good_config;
+ my $bad_value = $bad_config;
+ $good_value =~ s/(.*)=//;
+ my $config = $1;
+
+ $bad_value =~ s/.*=//;
+
+ doprint " $config $good_value -> $bad_value\n";
+}
+
+# Pass in:
+# $phalf: half of the configs names you want to add
+# $oconfigs: The orginial configs to start with
+# $sconfigs: The source to update $oconfigs with (from $phalf)
+# $which: The name of which half that is updating (top / bottom)
+# $type: The name of the source type (good / bad)
+sub make_half {
+ my ($phalf, $oconfigs, $sconfigs, $which, $type) = @_;
+
+ my @half = @{$phalf};
+ my %orig_configs = %{$oconfigs};
+ my %source_configs = %{$sconfigs};
+
+ my %tmp_config = %orig_configs;
+
+ doprint "Settings bisect with $which half of $type configs:\n";
+ foreach my $item (@half) {
+ doprint "Updating $item to $source_configs{$item}\n";
+ $tmp_config{$item} = $source_configs{$item};
+ }
+
+ return %tmp_config;
+}
+
+sub run_config_bisect {
+ my ($pgood, $pbad) = @_;
+
+ my %good_configs = %{$pgood};
+ my %bad_configs = %{$pbad};
+
+ my %diff_configs = diff_config_vals \%good_configs, \%bad_configs;
+ my %b_configs = diff_configs \%good_configs, \%bad_configs;
+ my %g_configs = diff_configs \%bad_configs, \%good_configs;
+
+ # diff_arr is what is in both good and bad but are different (y->n)
+ my @diff_arr = keys %diff_configs;
+ my $len_diff = $#diff_arr + 1;
+
+ # b_arr is what is in bad but not in good (has depends)
+ my @b_arr = keys %b_configs;
+ my $len_b = $#b_arr + 1;
+
+ # g_arr is what is in good but not in bad
+ my @g_arr = keys %g_configs;
+ my $len_g = $#g_arr + 1;
+
+ my $runtest = 0;
+ my %new_configs;
+ my $ret;
+
+ # Look at the configs that are different between good and bad.
+ # This does not include those that depend on other configs
+ # (configs depending on other configs that are not set would
+ # not show up even as a "# CONFIG_FOO is not set"
+
+
+ doprint "# of configs to check: $len_diff\n";
+ doprint "# of configs showing only in good: $len_g\n";
+ doprint "# of configs showing only in bad: $len_b\n";
+
+ if ($len_diff > 0) {
+ # Now test for different values
+
+ doprint "Configs left to check:\n";
+ doprint " Good Config\t\t\tBad Config\n";
+ doprint " -----------\t\t\t----------\n";
+ foreach my $item (@diff_arr) {
+ doprint " $good_configs{$item}\t$bad_configs{$item}\n";
+ }
+
+ my $half = int($#diff_arr / 2);
+ my @tophalf = @diff_arr[0 .. $half];
+
+ doprint "Set tmp config to be good config with some bad config values\n";
+
+ my %tmp_config = make_half \@tophalf, \%good_configs,
+ \%bad_configs, "top", "bad";
+
+ $runtest = process_new_config \%tmp_config, \%new_configs,
+ \%good_configs, \%bad_configs;
+
+ if (!$runtest) {
+ doprint "Set tmp config to be bad config with some good config values\n";
+
+ my %tmp_config = make_half \@tophalf, \%bad_configs,
+ \%good_configs, "top", "good";
+
+ $runtest = process_new_config \%tmp_config, \%new_configs,
+ \%good_configs, \%bad_configs;
+ }
+ }
+
+ if (!$runtest && $len_diff > 0) {
+ # do the same thing, but this time with bottom half
+
+ my $half = int($#diff_arr / 2);
+ my @bottomhalf = @diff_arr[$half+1 .. $#diff_arr];
+
+ doprint "Set tmp config to be good config with some bad config values\n";
+
+ my %tmp_config = make_half \@bottomhalf, \%good_configs,
+ \%bad_configs, "bottom", "bad";
+
+ $runtest = process_new_config \%tmp_config, \%new_configs,
+ \%good_configs, \%bad_configs;
+
+ if (!$runtest) {
+ doprint "Set tmp config to be bad config with some good config values\n";
+
+ my %tmp_config = make_half \@bottomhalf, \%bad_configs,
+ \%good_configs, "bottom", "good";
+
+ $runtest = process_new_config \%tmp_config, \%new_configs,
+ \%good_configs, \%bad_configs;
+ }
+ }
+
+ if ($runtest) {
+ make_oldconfig;
+ doprint "READY TO TEST .config IN $build\n";
+ return 0;
+ }
+
+ doprint "\n%%%%%%%% FAILED TO FIND SINGLE BAD CONFIG %%%%%%%%\n";
+ doprint "Hmm, can't make any more changes without making good == bad?\n";
+ doprint "Difference between good (+) and bad (-)\n";
+
+ foreach my $item (keys %bad_configs) {
+ if (!defined($good_configs{$item})) {
+ print_config "-", $bad_configs{$item};
+ }
+ }
+
+ foreach my $item (keys %good_configs) {
+ next if (!defined($bad_configs{$item}));
+ if ($good_configs{$item} ne $bad_configs{$item}) {
+ print_config_compare $good_configs{$item}, $bad_configs{$item};
+ }
+ }
+
+ foreach my $item (keys %good_configs) {
+ if (!defined($bad_configs{$item})) {
+ print_config "+", $good_configs{$item};
+ }
+ }
+ return -1;
+}
+
+sub config_bisect {
+ my ($good_config, $bad_config) = @_;
+ my $ret;
+
+ my %good_configs;
+ my %bad_configs;
+ my %tmp_configs;
+
+ doprint "Run good configs through make oldconfig\n";
+ assign_configs \%tmp_configs, $good_config;
+ create_config "$good_config", \%tmp_configs;
+ assign_configs \%good_configs, $output_config;
+
+ doprint "Run bad configs through make oldconfig\n";
+ assign_configs \%tmp_configs, $bad_config;
+ create_config "$bad_config", \%tmp_configs;
+ assign_configs \%bad_configs, $output_config;
+
+ save_config \%good_configs, $good_config;
+ save_config \%bad_configs, $bad_config;
+
+ return run_config_bisect \%good_configs, \%bad_configs;
+}
+
+while ($#ARGV >= 0) {
+ if ($ARGV[0] !~ m/^-/) {
+ last;
+ }
+ my $opt = shift @ARGV;
+
+ if ($opt eq "-b") {
+ $val = shift @ARGV;
+ if (!defined($val)) {
+ die "-b requires value\n";
+ }
+ $build = $val;
+ }
+
+ elsif ($opt eq "-l") {
+ $val = shift @ARGV;
+ if (!defined($val)) {
+ die "-l requires value\n";
+ }
+ $tree = $val;
+ }
+
+ elsif ($opt eq "-r") {
+ $reset_bisect = 1;
+ }
+
+ elsif ($opt eq "-h") {
+ usage;
+ }
+
+ else {
+ die "Unknow option $opt\n";
+ }
+}
+
+$build = $tree if (!defined($build));
+
+$tree = expand_path $tree;
+$build = expand_path $build;
+
+if ( ! -d $tree ) {
+ die "$tree not a directory\n";
+}
+
+if ( ! -d $build ) {
+ die "$build not a directory\n";
+}
+
+usage if $#ARGV < 1;
+
+if ($#ARGV == 1) {
+ $start = 1;
+} elsif ($#ARGV == 2) {
+ $val = $ARGV[2];
+ if ($val ne "good" && $val ne "bad") {
+ die "Unknown command '$val', bust be either \"good\" or \"bad\"\n";
+ }
+} else {
+ usage;
+}
+
+my $good_start = expand_path $ARGV[0];
+my $bad_start = expand_path $ARGV[1];
+
+my $good = "$good_start.tmp";
+my $bad = "$bad_start.tmp";
+
+$make = "make";
+
+if ($build ne $tree) {
+ $make = "make O=$build"
+}
+
+$output_config = "$build/.config";
+
+if ($start) {
+ if ( ! -f $good_start ) {
+ die "$good_start not found\n";
+ }
+ if ( ! -f $bad_start ) {
+ die "$bad_start not found\n";
+ }
+ if ( -f $good || -f $bad ) {
+ my $p = "";
+
+ if ( -f $good ) {
+ $p = "$good exists\n";
+ }
+
+ if ( -f $bad ) {
+ $p = "$p$bad exists\n";
+ }
+
+ if (!defined($reset_bisect)) {
+ if (!read_yn "${p}Overwrite and start new bisect anyway?") {
+ exit (-1);
+ }
+ }
+ }
+ run_command "cp $good_start $good" or die "failed to copy to $good\n";
+ run_command "cp $bad_start $bad" or die "faield to copy to $bad\n";
+} else {
+ if ( ! -f $good ) {
+ die "Can not find file $good\n";
+ }
+ if ( ! -f $bad ) {
+ die "Can not find file $bad\n";
+ }
+ if ($val eq "good") {
+ run_command "cp $output_config $good" or die "failed to copy $config to $good\n";
+ } elsif ($val eq "bad") {
+ run_command "cp $output_config $bad" or die "failed to copy $config to $bad\n";
+ }
+}
+
+chdir $tree || die "can't change directory to $tree";
+
+my $ret = config_bisect $good, $bad;
+
+if (!$ret) {
+ exit(0);
+}
+
+if ($ret > 0) {
+ doprint "Cleaning temp files\n";
+ run_command "rm $good";
+ run_command "rm $bad";
+ exit(1);
+} else {
+ doprint "See good and bad configs for details:\n";
+ doprint "good: $good\n";
+ doprint "bad: $bad\n";
+ doprint "%%%%%%%% FAILED TO FIND SINGLE BAD CONFIG %%%%%%%%\n";
+}
+exit(2);
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 8809f244bb7c..87af8a68ab25 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -10,6 +10,7 @@ use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK);
use File::Path qw(mkpath);
use File::Copy qw(cp);
use FileHandle;
+use FindBin;
my $VERSION = "0.2";
@@ -22,6 +23,11 @@ my %evals;
#default opts
my %default = (
+ "MAILER" => "sendmail", # default mailer
+ "EMAIL_ON_ERROR" => 1,
+ "EMAIL_WHEN_FINISHED" => 1,
+ "EMAIL_WHEN_CANCELED" => 0,
+ "EMAIL_WHEN_STARTED" => 0,
"NUM_TESTS" => 1,
"TEST_TYPE" => "build",
"BUILD_TYPE" => "randconfig",
@@ -59,6 +65,7 @@ my %default = (
"GRUB_REBOOT" => "grub2-reboot",
"SYSLINUX" => "extlinux",
"SYSLINUX_PATH" => "/boot/extlinux",
+ "CONNECT_TIMEOUT" => 25,
# required, and we will ask users if they don't have them but we keep the default
# value something that is common.
@@ -163,6 +170,8 @@ my $store_failures;
my $store_successes;
my $test_name;
my $timeout;
+my $connect_timeout;
+my $config_bisect_exec;
my $booted_timeout;
my $detect_triplefault;
my $console;
@@ -204,6 +213,20 @@ my $install_time;
my $reboot_time;
my $test_time;
+my $pwd;
+my $dirname = $FindBin::Bin;
+
+my $mailto;
+my $mailer;
+my $mail_path;
+my $mail_command;
+my $email_on_error;
+my $email_when_finished;
+my $email_when_started;
+my $email_when_canceled;
+
+my $script_start_time = localtime();
+
# set when a test is something other that just building or install
# which would require more options.
my $buildonly = 1;
@@ -229,6 +252,14 @@ my $no_reboot = 1;
my $reboot_success = 0;
my %option_map = (
+ "MAILTO" => \$mailto,
+ "MAILER" => \$mailer,
+ "MAIL_PATH" => \$mail_path,
+ "MAIL_COMMAND" => \$mail_command,
+ "EMAIL_ON_ERROR" => \$email_on_error,
+ "EMAIL_WHEN_FINISHED" => \$email_when_finished,
+ "EMAIL_WHEN_STARTED" => \$email_when_started,
+ "EMAIL_WHEN_CANCELED" => \$email_when_canceled,
"MACHINE" => \$machine,
"SSH_USER" => \$ssh_user,
"TMP_DIR" => \$tmpdir,
@@ -296,6 +327,8 @@ my %option_map = (
"STORE_SUCCESSES" => \$store_successes,
"TEST_NAME" => \$test_name,
"TIMEOUT" => \$timeout,
+ "CONNECT_TIMEOUT" => \$connect_timeout,
+ "CONFIG_BISECT_EXEC" => \$config_bisect_exec,
"BOOTED_TIMEOUT" => \$booted_timeout,
"CONSOLE" => \$console,
"CLOSE_CONSOLE_SIGNAL" => \$close_console_signal,
@@ -337,6 +370,7 @@ my %used_options;
# default variables that can be used
chomp ($variable{"PWD"} = `pwd`);
+$pwd = $variable{"PWD"};
$config_help{"MACHINE"} = << "EOF"
The machine hostname that you will test.
@@ -718,22 +752,14 @@ sub set_value {
my $prvalue = process_variables($rvalue);
- if ($buildonly && $lvalue =~ /^TEST_TYPE(\[.*\])?$/ && $prvalue ne "build") {
+ if ($lvalue =~ /^(TEST|BISECT|CONFIG_BISECT)_TYPE(\[.*\])?$/ &&
+ $prvalue !~ /^(config_|)bisect$/ &&
+ $prvalue !~ /^build$/ &&
+ $buildonly) {
+
# Note if a test is something other than build, then we
# will need other mandatory options.
if ($prvalue ne "install") {
- # for bisect, we need to check BISECT_TYPE
- if ($prvalue ne "bisect") {
- $buildonly = 0;
- }
- } else {
- # install still limits some mandatory options.
- $buildonly = 2;
- }
- }
-
- if ($buildonly && $lvalue =~ /^BISECT_TYPE(\[.*\])?$/ && $prvalue ne "build") {
- if ($prvalue ne "install") {
$buildonly = 0;
} else {
# install still limits some mandatory options.
@@ -1140,7 +1166,8 @@ sub __read_config {
sub get_test_case {
print "What test case would you like to run?\n";
print " (build, install or boot)\n";
- print " Other tests are available but require editing the config file\n";
+ print " Other tests are available but require editing ktest.conf\n";
+ print " (see tools/testing/ktest/sample.conf)\n";
my $ans = <STDIN>;
chomp $ans;
$default{"TEST_TYPE"} = $ans;
@@ -1328,8 +1355,8 @@ sub reboot {
my ($time) = @_;
my $powercycle = 0;
- # test if the machine can be connected to within 5 seconds
- my $stat = run_ssh("echo check machine status", 5);
+ # test if the machine can be connected to within a few seconds
+ my $stat = run_ssh("echo check machine status", $connect_timeout);
if (!$stat) {
doprint("power cycle\n");
$powercycle = 1;
@@ -1404,10 +1431,18 @@ sub do_not_reboot {
return $test_type eq "build" || $no_reboot ||
($test_type eq "patchcheck" && $opt{"PATCHCHECK_TYPE[$i]"} eq "build") ||
- ($test_type eq "bisect" && $opt{"BISECT_TYPE[$i]"} eq "build");
+ ($test_type eq "bisect" && $opt{"BISECT_TYPE[$i]"} eq "build") ||
+ ($test_type eq "config_bisect" && $opt{"CONFIG_BISECT_TYPE[$i]"} eq "build");
}
+my $in_die = 0;
+
sub dodie {
+
+ # avoid recusion
+ return if ($in_die);
+ $in_die = 1;
+
doprint "CRITICAL FAILURE... ", @_, "\n";
my $i = $iteration;
@@ -1426,6 +1461,11 @@ sub dodie {
print " See $opt{LOG_FILE} for more info.\n";
}
+ if ($email_on_error) {
+ send_email("KTEST: critical failure for your [$test_type] test",
+ "Your test started at $script_start_time has failed with:\n@_\n");
+ }
+
if ($monitor_cnt) {
# restore terminal settings
system("stty $stty_orig");
@@ -1477,7 +1517,7 @@ sub exec_console {
close($pts);
exec $console or
- die "Can't open console $console";
+ dodie "Can't open console $console";
}
sub open_console {
@@ -1515,6 +1555,9 @@ sub close_console {
doprint "kill child process $pid\n";
kill $close_console_signal, $pid;
+ doprint "wait for child process $pid to exit\n";
+ waitpid($pid, 0);
+
print "closing!\n";
close($fp);
@@ -1625,7 +1668,7 @@ sub save_logs {
if (!-d $dir) {
mkpath($dir) or
- die "can't create $dir";
+ dodie "can't create $dir";
}
my %files = (
@@ -1638,7 +1681,7 @@ sub save_logs {
while (my ($name, $source) = each(%files)) {
if (-f "$source") {
cp "$source", "$dir/$name" or
- die "failed to copy $source";
+ dodie "failed to copy $source";
}
}
@@ -1692,6 +1735,7 @@ sub run_command {
my $end_time;
my $dolog = 0;
my $dord = 0;
+ my $dostdout = 0;
my $pid;
$command =~ s/\$SSH_USER/$ssh_user/g;
@@ -1710,9 +1754,15 @@ sub run_command {
}
if (defined($redirect)) {
- open (RD, ">$redirect") or
- dodie "failed to write to redirect $redirect";
- $dord = 1;
+ if ($redirect eq 1) {
+ $dostdout = 1;
+ # Have the output of the command on its own line
+ doprint "\n";
+ } else {
+ open (RD, ">$redirect") or
+ dodie "failed to write to redirect $redirect";
+ $dord = 1;
+ }
}
my $hit_timeout = 0;
@@ -1734,6 +1784,7 @@ sub run_command {
}
print LOG $line if ($dolog);
print RD $line if ($dord);
+ print $line if ($dostdout);
}
waitpid($pid, 0);
@@ -1812,7 +1863,7 @@ sub get_grub2_index {
$ssh_grub =~ s,\$SSH_COMMAND,cat $grub_file,g;
open(IN, "$ssh_grub |")
- or die "unable to get $grub_file";
+ or dodie "unable to get $grub_file";
my $found = 0;
@@ -1821,13 +1872,13 @@ sub get_grub2_index {
$grub_number++;
$found = 1;
last;
- } elsif (/^menuentry\s/) {
+ } elsif (/^menuentry\s|^submenu\s/) {
$grub_number++;
}
}
close(IN);
- die "Could not find '$grub_menu' in $grub_file on $machine"
+ dodie "Could not find '$grub_menu' in $grub_file on $machine"
if (!$found);
doprint "$grub_number\n";
$last_grub_menu = $grub_menu;
@@ -1855,7 +1906,7 @@ sub get_grub_index {
$ssh_grub =~ s,\$SSH_COMMAND,cat /boot/grub/menu.lst,g;
open(IN, "$ssh_grub |")
- or die "unable to get menu.lst";
+ or dodie "unable to get menu.lst";
my $found = 0;
@@ -1870,7 +1921,7 @@ sub get_grub_index {
}
close(IN);
- die "Could not find '$grub_menu' in /boot/grub/menu on $machine"
+ dodie "Could not find '$grub_menu' in /boot/grub/menu on $machine"
if (!$found);
doprint "$grub_number\n";
$last_grub_menu = $grub_menu;
@@ -1983,7 +2034,7 @@ sub monitor {
my $full_line = "";
open(DMESG, "> $dmesg") or
- die "unable to write to $dmesg";
+ dodie "unable to write to $dmesg";
reboot_to;
@@ -2862,7 +2913,7 @@ sub run_bisect {
sub update_bisect_replay {
my $tmp_log = "$tmpdir/ktest_bisect_log";
run_command "git bisect log > $tmp_log" or
- die "can't create bisect log";
+ dodie "can't create bisect log";
return $tmp_log;
}
@@ -2871,9 +2922,9 @@ sub bisect {
my $result;
- die "BISECT_GOOD[$i] not defined\n" if (!defined($bisect_good));
- die "BISECT_BAD[$i] not defined\n" if (!defined($bisect_bad));
- die "BISECT_TYPE[$i] not defined\n" if (!defined($bisect_type));
+ dodie "BISECT_GOOD[$i] not defined\n" if (!defined($bisect_good));
+ dodie "BISECT_BAD[$i] not defined\n" if (!defined($bisect_bad));
+ dodie "BISECT_TYPE[$i] not defined\n" if (!defined($bisect_type));
my $good = $bisect_good;
my $bad = $bisect_bad;
@@ -2936,7 +2987,7 @@ sub bisect {
if ($check ne "good") {
doprint "TESTING BISECT BAD [$bad]\n";
run_command "git checkout $bad" or
- die "Failed to checkout $bad";
+ dodie "Failed to checkout $bad";
$result = run_bisect $type;
@@ -2948,7 +2999,7 @@ sub bisect {
if ($check ne "bad") {
doprint "TESTING BISECT GOOD [$good]\n";
run_command "git checkout $good" or
- die "Failed to checkout $good";
+ dodie "Failed to checkout $good";
$result = run_bisect $type;
@@ -2959,7 +3010,7 @@ sub bisect {
# checkout where we started
run_command "git checkout $head" or
- die "Failed to checkout $head";
+ dodie "Failed to checkout $head";
}
run_command "git bisect start$start_files" or
@@ -3092,76 +3143,6 @@ sub create_config {
make_oldconfig;
}
-# compare two config hashes, and return configs with different vals.
-# It returns B's config values, but you can use A to see what A was.
-sub diff_config_vals {
- my ($pa, $pb) = @_;
-
- # crappy Perl way to pass in hashes.
- my %a = %{$pa};
- my %b = %{$pb};
-
- my %ret;
-
- foreach my $item (keys %a) {
- if (defined($b{$item}) && $b{$item} ne $a{$item}) {
- $ret{$item} = $b{$item};
- }
- }
-
- return %ret;
-}
-
-# compare two config hashes and return the configs in B but not A
-sub diff_configs {
- my ($pa, $pb) = @_;
-
- my %ret;
-
- # crappy Perl way to pass in hashes.
- my %a = %{$pa};
- my %b = %{$pb};
-
- foreach my $item (keys %b) {
- if (!defined($a{$item})) {
- $ret{$item} = $b{$item};
- }
- }
-
- return %ret;
-}
-
-# return if two configs are equal or not
-# 0 is equal +1 b has something a does not
-# +1 if a and b have a different item.
-# -1 if a has something b does not
-sub compare_configs {
- my ($pa, $pb) = @_;
-
- my %ret;
-
- # crappy Perl way to pass in hashes.
- my %a = %{$pa};
- my %b = %{$pb};
-
- foreach my $item (keys %b) {
- if (!defined($a{$item})) {
- return 1;
- }
- if ($a{$item} ne $b{$item}) {
- return 1;
- }
- }
-
- foreach my $item (keys %a) {
- if (!defined($b{$item})) {
- return -1;
- }
- }
-
- return 0;
-}
-
sub run_config_bisect_test {
my ($type) = @_;
@@ -3174,166 +3155,57 @@ sub run_config_bisect_test {
return $ret;
}
-sub process_failed {
- my ($config) = @_;
+sub config_bisect_end {
+ my ($good, $bad) = @_;
+ my $diffexec = "diff -u";
+ if (-f "$builddir/scripts/diffconfig") {
+ $diffexec = "$builddir/scripts/diffconfig";
+ }
doprint "\n\n***************************************\n";
- doprint "Found bad config: $config\n";
+ doprint "No more config bisecting possible.\n";
+ run_command "$diffexec $good $bad", 1;
doprint "***************************************\n\n";
}
-# used for config bisecting
-my $good_config;
-my $bad_config;
-
-sub process_new_config {
- my ($tc, $nc, $gc, $bc) = @_;
-
- my %tmp_config = %{$tc};
- my %good_configs = %{$gc};
- my %bad_configs = %{$bc};
-
- my %new_configs;
-
- my $runtest = 1;
- my $ret;
-
- create_config "tmp_configs", \%tmp_config;
- assign_configs \%new_configs, $output_config;
-
- $ret = compare_configs \%new_configs, \%bad_configs;
- if (!$ret) {
- doprint "New config equals bad config, try next test\n";
- $runtest = 0;
- }
-
- if ($runtest) {
- $ret = compare_configs \%new_configs, \%good_configs;
- if (!$ret) {
- doprint "New config equals good config, try next test\n";
- $runtest = 0;
- }
- }
-
- %{$nc} = %new_configs;
-
- return $runtest;
-}
-
sub run_config_bisect {
- my ($pgood, $pbad) = @_;
-
- my $type = $config_bisect_type;
-
- my %good_configs = %{$pgood};
- my %bad_configs = %{$pbad};
-
- my %diff_configs = diff_config_vals \%good_configs, \%bad_configs;
- my %b_configs = diff_configs \%good_configs, \%bad_configs;
- my %g_configs = diff_configs \%bad_configs, \%good_configs;
-
- my @diff_arr = keys %diff_configs;
- my $len_diff = $#diff_arr + 1;
-
- my @b_arr = keys %b_configs;
- my $len_b = $#b_arr + 1;
-
- my @g_arr = keys %g_configs;
- my $len_g = $#g_arr + 1;
-
- my $runtest = 1;
- my %new_configs;
+ my ($good, $bad, $last_result) = @_;
+ my $reset = "";
+ my $cmd;
my $ret;
- # First, lets get it down to a single subset.
- # Is the problem with a difference in values?
- # Is the problem with a missing config?
- # Is the problem with a config that breaks things?
-
- # Enable all of one set and see if we get a new bad
- # or good config.
-
- # first set the good config to the bad values.
-
- doprint "d=$len_diff g=$len_g b=$len_b\n";
-
- # first lets enable things in bad config that are enabled in good config
-
- if ($len_diff > 0) {
- if ($len_b > 0 || $len_g > 0) {
- my %tmp_config = %bad_configs;
-
- doprint "Set tmp config to be bad config with good config values\n";
- foreach my $item (@diff_arr) {
- $tmp_config{$item} = $good_configs{$item};
- }
-
- $runtest = process_new_config \%tmp_config, \%new_configs,
- \%good_configs, \%bad_configs;
- }
+ if (!length($last_result)) {
+ $reset = "-r";
}
+ run_command "$config_bisect_exec $reset -b $outputdir $good $bad $last_result", 1;
- if (!$runtest && $len_diff > 0) {
-
- if ($len_diff == 1) {
- process_failed $diff_arr[0];
- return 1;
- }
- my %tmp_config = %bad_configs;
-
- my $half = int($#diff_arr / 2);
- my @tophalf = @diff_arr[0 .. $half];
-
- doprint "Settings bisect with top half:\n";
- doprint "Set tmp config to be bad config with some good config values\n";
- foreach my $item (@tophalf) {
- $tmp_config{$item} = $good_configs{$item};
- }
-
- $runtest = process_new_config \%tmp_config, \%new_configs,
- \%good_configs, \%bad_configs;
-
- if (!$runtest) {
- my %tmp_config = %bad_configs;
-
- doprint "Try bottom half\n";
-
- my @bottomhalf = @diff_arr[$half+1 .. $#diff_arr];
-
- foreach my $item (@bottomhalf) {
- $tmp_config{$item} = $good_configs{$item};
- }
-
- $runtest = process_new_config \%tmp_config, \%new_configs,
- \%good_configs, \%bad_configs;
- }
+ # config-bisect returns:
+ # 0 if there is more to bisect
+ # 1 for finding a good config
+ # 2 if it can not find any more configs
+ # -1 (255) on error
+ if ($run_command_status) {
+ return $run_command_status;
}
- if ($runtest) {
- $ret = run_config_bisect_test $type;
- if ($ret) {
- doprint "NEW GOOD CONFIG\n";
- %good_configs = %new_configs;
- run_command "mv $good_config ${good_config}.last";
- save_config \%good_configs, $good_config;
- %{$pgood} = %good_configs;
- } else {
- doprint "NEW BAD CONFIG\n";
- %bad_configs = %new_configs;
- run_command "mv $bad_config ${bad_config}.last";
- save_config \%bad_configs, $bad_config;
- %{$pbad} = %bad_configs;
- }
- return 0;
+ $ret = run_config_bisect_test $config_bisect_type;
+ if ($ret) {
+ doprint "NEW GOOD CONFIG\n";
+ # Return 3 for good config
+ return 3;
+ } else {
+ doprint "NEW BAD CONFIG\n";
+ # Return 4 for bad config
+ return 4;
}
-
- fail "Hmm, need to do a mix match?\n";
- return -1;
}
sub config_bisect {
my ($i) = @_;
+ my $good_config;
+ my $bad_config;
+
my $type = $config_bisect_type;
my $ret;
@@ -3353,6 +3225,24 @@ sub config_bisect {
$good_config = $output_config;
}
+ if (!defined($config_bisect_exec)) {
+ # First check the location that ktest.pl ran
+ my @locations = ( "$pwd/config-bisect.pl",
+ "$dirname/config-bisect.pl",
+ "$builddir/tools/testing/ktest/config-bisect.pl",
+ undef );
+ foreach my $loc (@locations) {
+ doprint "loc = $loc\n";
+ $config_bisect_exec = $loc;
+ last if (defined($config_bisect_exec && -x $config_bisect_exec));
+ }
+ if (!defined($config_bisect_exec)) {
+ fail "Could not find an executable config-bisect.pl\n",
+ " Set CONFIG_BISECT_EXEC to point to config-bisect.pl";
+ return 1;
+ }
+ }
+
# we don't want min configs to cause issues here.
doprint "Disabling 'MIN_CONFIG' for this test\n";
undef $minconfig;
@@ -3361,21 +3251,31 @@ sub config_bisect {
my %bad_configs;
my %tmp_configs;
+ if (-f "$tmpdir/good_config.tmp" || -f "$tmpdir/bad_config.tmp") {
+ if (read_yn "Interrupted config-bisect. Continue (n - will start new)?") {
+ if (-f "$tmpdir/good_config.tmp") {
+ $good_config = "$tmpdir/good_config.tmp";
+ } else {
+ $good_config = "$tmpdir/good_config";
+ }
+ if (-f "$tmpdir/bad_config.tmp") {
+ $bad_config = "$tmpdir/bad_config.tmp";
+ } else {
+ $bad_config = "$tmpdir/bad_config";
+ }
+ }
+ }
doprint "Run good configs through make oldconfig\n";
assign_configs \%tmp_configs, $good_config;
create_config "$good_config", \%tmp_configs;
- assign_configs \%good_configs, $output_config;
+ $good_config = "$tmpdir/good_config";
+ system("cp $output_config $good_config") == 0 or dodie "cp good config";
doprint "Run bad configs through make oldconfig\n";
assign_configs \%tmp_configs, $bad_config;
create_config "$bad_config", \%tmp_configs;
- assign_configs \%bad_configs, $output_config;
-
- $good_config = "$tmpdir/good_config";
$bad_config = "$tmpdir/bad_config";
-
- save_config \%good_configs, $good_config;
- save_config \%bad_configs, $bad_config;
+ system("cp $output_config $bad_config") == 0 or dodie "cp bad config";
if (defined($config_bisect_check) && $config_bisect_check ne "0") {
if ($config_bisect_check ne "good") {
@@ -3398,10 +3298,21 @@ sub config_bisect {
}
}
+ my $last_run = "";
+
do {
- $ret = run_config_bisect \%good_configs, \%bad_configs;
+ $ret = run_config_bisect $good_config, $bad_config, $last_run;
+ if ($ret == 3) {
+ $last_run = "good";
+ } elsif ($ret == 4) {
+ $last_run = "bad";
+ }
print_times;
- } while (!$ret);
+ } while ($ret == 3 || $ret == 4);
+
+ if ($ret == 2) {
+ config_bisect_end "$good_config.tmp", "$bad_config.tmp";
+ }
return $ret if ($ret < 0);
@@ -3416,9 +3327,9 @@ sub patchcheck_reboot {
sub patchcheck {
my ($i) = @_;
- die "PATCHCHECK_START[$i] not defined\n"
+ dodie "PATCHCHECK_START[$i] not defined\n"
if (!defined($patchcheck_start));
- die "PATCHCHECK_TYPE[$i] not defined\n"
+ dodie "PATCHCHECK_TYPE[$i] not defined\n"
if (!defined($patchcheck_type));
my $start = $patchcheck_start;
@@ -3432,7 +3343,7 @@ sub patchcheck {
if (defined($patchcheck_end)) {
$end = $patchcheck_end;
} elsif ($cherry) {
- die "PATCHCHECK_END must be defined with PATCHCHECK_CHERRY\n";
+ dodie "PATCHCHECK_END must be defined with PATCHCHECK_CHERRY\n";
}
# Get the true sha1's since we can use things like HEAD~3
@@ -3496,7 +3407,7 @@ sub patchcheck {
doprint "\nProcessing commit \"$item\"\n\n";
run_command "git checkout $sha1" or
- die "Failed to checkout $sha1";
+ dodie "Failed to checkout $sha1";
# only clean on the first and last patch
if ($item eq $list[0] ||
@@ -3587,7 +3498,7 @@ sub read_kconfig {
}
open(KIN, "$kconfig")
- or die "Can't open $kconfig";
+ or dodie "Can't open $kconfig";
while (<KIN>) {
chomp;
@@ -3746,7 +3657,7 @@ sub get_depends {
$dep =~ s/^[^$valid]*[$valid]+//;
} else {
- die "this should never happen";
+ dodie "this should never happen";
}
}
@@ -4007,7 +3918,7 @@ sub make_min_config {
# update new ignore configs
if (defined($ignore_config)) {
open (OUT, ">$temp_config")
- or die "Can't write to $temp_config";
+ or dodie "Can't write to $temp_config";
foreach my $config (keys %save_configs) {
print OUT "$save_configs{$config}\n";
}
@@ -4035,7 +3946,7 @@ sub make_min_config {
# Save off all the current mandatory configs
open (OUT, ">$temp_config")
- or die "Can't write to $temp_config";
+ or dodie "Can't write to $temp_config";
foreach my $config (keys %keep_configs) {
print OUT "$keep_configs{$config}\n";
}
@@ -4222,6 +4133,74 @@ sub set_test_option {
return eval_option($name, $option, $i);
}
+sub find_mailer {
+ my ($mailer) = @_;
+
+ my @paths = split /:/, $ENV{PATH};
+
+ # sendmail is usually in /usr/sbin
+ $paths[$#paths + 1] = "/usr/sbin";
+
+ foreach my $path (@paths) {
+ if (-x "$path/$mailer") {
+ return $path;
+ }
+ }
+
+ return undef;
+}
+
+sub do_send_mail {
+ my ($subject, $message) = @_;
+
+ if (!defined($mail_path)) {
+ # find the mailer
+ $mail_path = find_mailer $mailer;
+ if (!defined($mail_path)) {
+ die "\nCan not find $mailer in PATH\n";
+ }
+ }
+
+ if (!defined($mail_command)) {
+ if ($mailer eq "mail" || $mailer eq "mailx") {
+ $mail_command = "\$MAIL_PATH/\$MAILER -s \'\$SUBJECT\' \$MAILTO <<< \'\$MESSAGE\'";
+ } elsif ($mailer eq "sendmail" ) {
+ $mail_command = "echo \'Subject: \$SUBJECT\n\n\$MESSAGE\' | \$MAIL_PATH/\$MAILER -t \$MAILTO";
+ } else {
+ die "\nYour mailer: $mailer is not supported.\n";
+ }
+ }
+
+ $mail_command =~ s/\$MAILER/$mailer/g;
+ $mail_command =~ s/\$MAIL_PATH/$mail_path/g;
+ $mail_command =~ s/\$MAILTO/$mailto/g;
+ $mail_command =~ s/\$SUBJECT/$subject/g;
+ $mail_command =~ s/\$MESSAGE/$message/g;
+
+ run_command $mail_command;
+}
+
+sub send_email {
+
+ if (defined($mailto)) {
+ if (!defined($mailer)) {
+ doprint "No email sent: email or mailer not specified in config.\n";
+ return;
+ }
+ do_send_mail @_;
+ }
+}
+
+sub cancel_test {
+ if ($email_when_canceled) {
+ send_email("KTEST: Your [$test_type] test was cancelled",
+ "Your test started at $script_start_time was cancelled: sig int");
+ }
+ die "\nCaught Sig Int, test interrupted: $!\n"
+}
+
+$SIG{INT} = qw(cancel_test);
+
# First we need to do is the builds
for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
@@ -4245,11 +4224,11 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
$outputdir = set_test_option("OUTPUT_DIR", $i);
$builddir = set_test_option("BUILD_DIR", $i);
- chdir $builddir || die "can't change directory to $builddir";
+ chdir $builddir || dodie "can't change directory to $builddir";
if (!-d $outputdir) {
mkpath($outputdir) or
- die "can't create $outputdir";
+ dodie "can't create $outputdir";
}
$make = "$makecmd O=$outputdir";
@@ -4262,9 +4241,15 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
$start_minconfig_defined = 1;
# The first test may override the PRE_KTEST option
- if (defined($pre_ktest) && $i == 1) {
- doprint "\n";
- run_command $pre_ktest;
+ if ($i == 1) {
+ if (defined($pre_ktest)) {
+ doprint "\n";
+ run_command $pre_ktest;
+ }
+ if ($email_when_started) {
+ send_email("KTEST: Your [$test_type] test was started",
+ "Your test was started on $script_start_time");
+ }
}
# Any test can override the POST_KTEST option
@@ -4280,7 +4265,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
if (!-d $tmpdir) {
mkpath($tmpdir) or
- die "can't create $tmpdir";
+ dodie "can't create $tmpdir";
}
$ENV{"SSH_USER"} = $ssh_user;
@@ -4353,7 +4338,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
if (defined($checkout)) {
run_command "git checkout $checkout" or
- die "failed to checkout $checkout";
+ dodie "failed to checkout $checkout";
}
$no_reboot = 0;
@@ -4428,4 +4413,8 @@ if ($opt{"POWEROFF_ON_SUCCESS"}) {
doprint "\n $successes of $opt{NUM_TESTS} tests were successful\n\n";
+if ($email_when_finished) {
+ send_email("KTEST: Your [$test_type] test has finished!",
+ "$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!");
+}
exit 0;
diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
index 6c58cd8bbbae..6ca6ca0ce695 100644
--- a/tools/testing/ktest/sample.conf
+++ b/tools/testing/ktest/sample.conf
@@ -1,6 +1,11 @@
#
# Config file for ktest.pl
#
+# Place your customized version of this, in the working directory that
+# ktest.pl is run from. By default, ktest.pl will look for a file
+# called "ktest.conf", but you can name it anything you like and specify
+# the name of your config file as the first argument of ktest.pl.
+#
# Note, all paths must be absolute
#
@@ -396,6 +401,44 @@
#### Optional Config Options (all have defaults) ####
+# Email options for receiving notifications. Users must setup
+# the specified mailer prior to using this feature.
+#
+# (default undefined)
+#MAILTO =
+#
+# Supported mailers: sendmail, mail, mailx
+# (default sendmail)
+#MAILER = sendmail
+#
+# The executable to run
+# (default: for sendmail "/usr/sbin/sendmail", otherwise equals ${MAILER})
+#MAIL_EXEC = /usr/sbin/sendmail
+#
+# The command used to send mail, which uses the above options
+# can be modified. By default if the mailer is "sendmail" then
+# MAIL_COMMAND = echo \'Subject: $SUBJECT\n\n$MESSAGE\' | $MAIL_PATH/$MAILER -t $MAILTO
+# For mail or mailx:
+# MAIL_COMMAND = "$MAIL_PATH/$MAILER -s \'$SUBJECT\' $MAILTO <<< \'$MESSAGE\'
+# ktest.pl will do the substitution for MAIL_PATH, MAILER, MAILTO at the time
+# it sends the mail if "$FOO" format is used. If "${FOO}" format is used,
+# then the substitutions will occur at the time the config file is read.
+# But note, MAIL_PATH and MAILER require being set by the config file if
+# ${MAIL_PATH} or ${MAILER} are used, but not if $MAIL_PATH or $MAILER are.
+#MAIL_COMMAND = echo \'Subject: $SUBJECT\n\n$MESSAGE\' | $MAIL_PATH/$MAILER -t $MAILTO
+#
+# Errors are defined as those would terminate the script
+# (default 1)
+#EMAIL_ON_ERROR = 1
+# (default 1)
+#EMAIL_WHEN_FINISHED = 1
+# (default 0)
+#EMAIL_WHEN_STARTED = 1
+#
+# Users can cancel the test by Ctrl^C
+# (default 0)
+#EMAIL_WHEN_CANCELED = 1
+
# Start a test setup. If you leave this off, all options
# will be default and the test will run once.
# This is a label and not really an option (it takes no value).
@@ -725,6 +768,13 @@
# (default 120)
#TIMEOUT = 120
+# The timeout in seconds when to test if the box can be rebooted
+# or not. Before issuing the reboot command, a ssh connection
+# is attempted to see if the target machine is still active.
+# If the target does not connect within this timeout, a power cycle
+# is issued instead of a reboot.
+# CONNECT_TIMEOUT = 25
+
# In between tests, a reboot of the box may occur, and this
# is the time to wait for the console after it stops producing
# output. Some machines may not produce a large lag on reboot
@@ -1167,6 +1217,16 @@
# Set it to "good" to test only the good config and set it
# to "bad" to only test the bad config.
#
+# CONFIG_BISECT_EXEC (optional)
+# The config bisect is a separate program that comes with ktest.pl.
+# By befault, it will look for:
+# `pwd`/config-bisect.pl # the location ktest.pl was executed from.
+# If it does not find it there, it will look for:
+# `dirname <ktest.pl>`/config-bisect.pl # The directory that holds ktest.pl
+# If it does not find it there, it will look for:
+# ${BUILD_DIR}/tools/testing/ktest/config-bisect.pl
+# Setting CONFIG_BISECT_EXEC will override where it looks.
+#
# Example:
# TEST_START
# TEST_TYPE = config_bisect
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 620fa78b3b1b..4ea385be528f 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -104,7 +104,8 @@ enum {
NUM_HINTS = 8,
NUM_BDW = NUM_DCR,
NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW,
- NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */,
+ NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */
+ + 4 /* spa1 iset */ + 1 /* spa11 iset */,
DIMM_SIZE = SZ_32M,
LABEL_SIZE = SZ_128K,
SPA_VCD_SIZE = SZ_4M,
@@ -137,6 +138,7 @@ static u32 handle[] = {
};
static unsigned long dimm_fail_cmd_flags[NUM_DCR];
+static int dimm_fail_cmd_code[NUM_DCR];
struct nfit_test_fw {
enum intel_fw_update_state state;
@@ -153,6 +155,7 @@ struct nfit_test {
void *nfit_buf;
dma_addr_t nfit_dma;
size_t nfit_size;
+ size_t nfit_filled;
int dcr_idx;
int num_dcr;
int num_pm;
@@ -709,7 +712,9 @@ static void smart_notify(struct device *bus_dev,
>= thresh->media_temperature)
|| ((thresh->alarm_control & ND_INTEL_SMART_CTEMP_TRIP)
&& smart->ctrl_temperature
- >= thresh->ctrl_temperature)) {
+ >= thresh->ctrl_temperature)
+ || (smart->health != ND_INTEL_SMART_NON_CRITICAL_HEALTH)
+ || (smart->shutdown_state != 0)) {
device_lock(bus_dev);
__acpi_nvdimm_notify(dimm_dev, 0x81);
device_unlock(bus_dev);
@@ -735,6 +740,32 @@ static int nfit_test_cmd_smart_set_threshold(
return 0;
}
+static int nfit_test_cmd_smart_inject(
+ struct nd_intel_smart_inject *inj,
+ unsigned int buf_len,
+ struct nd_intel_smart_threshold *thresh,
+ struct nd_intel_smart *smart,
+ struct device *bus_dev, struct device *dimm_dev)
+{
+ if (buf_len != sizeof(*inj))
+ return -EINVAL;
+
+ if (inj->mtemp_enable)
+ smart->media_temperature = inj->media_temperature;
+ if (inj->spare_enable)
+ smart->spares = inj->spares;
+ if (inj->fatal_enable)
+ smart->health = ND_INTEL_SMART_FATAL_HEALTH;
+ if (inj->unsafe_shutdown_enable) {
+ smart->shutdown_state = 1;
+ smart->shutdown_count++;
+ }
+ inj->status = 0;
+ smart_notify(bus_dev, dimm_dev, smart, thresh);
+
+ return 0;
+}
+
static void uc_error_notify(struct work_struct *work)
{
struct nfit_test *t = container_of(work, typeof(*t), work);
@@ -862,8 +893,11 @@ static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func)
if (i >= ARRAY_SIZE(handle))
return -ENXIO;
- if ((1 << func) & dimm_fail_cmd_flags[i])
+ if ((1 << func) & dimm_fail_cmd_flags[i]) {
+ if (dimm_fail_cmd_code[i])
+ return dimm_fail_cmd_code[i];
return -EIO;
+ }
return i;
}
@@ -935,6 +969,13 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
t->dcr_idx],
&t->smart[i - t->dcr_idx],
&t->pdev.dev, t->dimm_dev[i]);
+ case ND_INTEL_SMART_INJECT:
+ return nfit_test_cmd_smart_inject(buf,
+ buf_len,
+ &t->smart_threshold[i -
+ t->dcr_idx],
+ &t->smart[i - t->dcr_idx],
+ &t->pdev.dev, t->dimm_dev[i]);
default:
return -ENOTTY;
}
@@ -1125,12 +1166,12 @@ static int ars_state_init(struct device *dev, struct ars_state *ars_state)
static void put_dimms(void *data)
{
- struct device **dimm_dev = data;
+ struct nfit_test *t = data;
int i;
- for (i = 0; i < NUM_DCR; i++)
- if (dimm_dev[i])
- device_unregister(dimm_dev[i]);
+ for (i = 0; i < t->num_dcr; i++)
+ if (t->dimm_dev[i])
+ device_unregister(t->dimm_dev[i]);
}
static struct class *nfit_test_dimm;
@@ -1139,13 +1180,11 @@ static int dimm_name_to_id(struct device *dev)
{
int dimm;
- if (sscanf(dev_name(dev), "test_dimm%d", &dimm) != 1
- || dimm >= NUM_DCR || dimm < 0)
+ if (sscanf(dev_name(dev), "test_dimm%d", &dimm) != 1)
return -ENXIO;
return dimm;
}
-
static ssize_t handle_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
@@ -1154,7 +1193,7 @@ static ssize_t handle_show(struct device *dev, struct device_attribute *attr,
if (dimm < 0)
return dimm;
- return sprintf(buf, "%#x", handle[dimm]);
+ return sprintf(buf, "%#x\n", handle[dimm]);
}
DEVICE_ATTR_RO(handle);
@@ -1188,8 +1227,39 @@ static ssize_t fail_cmd_store(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR_RW(fail_cmd);
+static ssize_t fail_cmd_code_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ int dimm = dimm_name_to_id(dev);
+
+ if (dimm < 0)
+ return dimm;
+
+ return sprintf(buf, "%d\n", dimm_fail_cmd_code[dimm]);
+}
+
+static ssize_t fail_cmd_code_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t size)
+{
+ int dimm = dimm_name_to_id(dev);
+ unsigned long val;
+ ssize_t rc;
+
+ if (dimm < 0)
+ return dimm;
+
+ rc = kstrtol(buf, 0, &val);
+ if (rc)
+ return rc;
+
+ dimm_fail_cmd_code[dimm] = val;
+ return size;
+}
+static DEVICE_ATTR_RW(fail_cmd_code);
+
static struct attribute *nfit_test_dimm_attributes[] = {
&dev_attr_fail_cmd.attr,
+ &dev_attr_fail_cmd_code.attr,
&dev_attr_handle.attr,
NULL,
};
@@ -1203,6 +1273,23 @@ static const struct attribute_group *nfit_test_dimm_attribute_groups[] = {
NULL,
};
+static int nfit_test_dimm_init(struct nfit_test *t)
+{
+ int i;
+
+ if (devm_add_action_or_reset(&t->pdev.dev, put_dimms, t))
+ return -ENOMEM;
+ for (i = 0; i < t->num_dcr; i++) {
+ t->dimm_dev[i] = device_create_with_groups(nfit_test_dimm,
+ &t->pdev.dev, 0, NULL,
+ nfit_test_dimm_attribute_groups,
+ "test_dimm%d", i + t->dcr_idx);
+ if (!t->dimm_dev[i])
+ return -ENOMEM;
+ }
+ return 0;
+}
+
static void smart_init(struct nfit_test *t)
{
int i;
@@ -1222,7 +1309,7 @@ static void smart_init(struct nfit_test *t)
| ND_INTEL_SMART_MTEMP_VALID,
.health = ND_INTEL_SMART_NON_CRITICAL_HEALTH,
.media_temperature = 23 * 16,
- .ctrl_temperature = 30 * 16,
+ .ctrl_temperature = 25 * 16,
.pmic_temperature = 40 * 16,
.spares = 75,
.alarm_flags = ND_INTEL_SMART_SPARE_TRIP
@@ -1298,17 +1385,8 @@ static int nfit_test0_alloc(struct nfit_test *t)
if (!t->_fit)
return -ENOMEM;
- if (devm_add_action_or_reset(&t->pdev.dev, put_dimms, t->dimm_dev))
+ if (nfit_test_dimm_init(t))
return -ENOMEM;
- for (i = 0; i < NUM_DCR; i++) {
- t->dimm_dev[i] = device_create_with_groups(nfit_test_dimm,
- &t->pdev.dev, 0, NULL,
- nfit_test_dimm_attribute_groups,
- "test_dimm%d", i);
- if (!t->dimm_dev[i])
- return -ENOMEM;
- }
-
smart_init(t);
return ars_state_init(&t->pdev.dev, &t->ars_state);
}
@@ -1340,6 +1418,8 @@ static int nfit_test1_alloc(struct nfit_test *t)
if (!t->spa_set[1])
return -ENOMEM;
+ if (nfit_test_dimm_init(t))
+ return -ENOMEM;
smart_init(t);
return ars_state_init(&t->pdev.dev, &t->ars_state);
}
@@ -1366,7 +1446,7 @@ static void nfit_test0_setup(struct nfit_test *t)
struct acpi_nfit_data_region *bdw;
struct acpi_nfit_flush_address *flush;
struct acpi_nfit_capabilities *pcap;
- unsigned int offset, i;
+ unsigned int offset = 0, i;
/*
* spa0 (interleave first half of dimm0 and dimm1, note storage
@@ -1380,93 +1460,102 @@ static void nfit_test0_setup(struct nfit_test *t)
spa->range_index = 0+1;
spa->address = t->spa_set_dma[0];
spa->length = SPA0_SIZE;
+ offset += spa->header.length;
/*
* spa1 (interleave last half of the 4 DIMMS, note storage
* does not actually alias the related block-data-window
* regions)
*/
- spa = nfit_buf + sizeof(*spa);
+ spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
spa->range_index = 1+1;
spa->address = t->spa_set_dma[1];
spa->length = SPA1_SIZE;
+ offset += spa->header.length;
/* spa2 (dcr0) dimm0 */
- spa = nfit_buf + sizeof(*spa) * 2;
+ spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
spa->range_index = 2+1;
spa->address = t->dcr_dma[0];
spa->length = DCR_SIZE;
+ offset += spa->header.length;
/* spa3 (dcr1) dimm1 */
- spa = nfit_buf + sizeof(*spa) * 3;
+ spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
spa->range_index = 3+1;
spa->address = t->dcr_dma[1];
spa->length = DCR_SIZE;
+ offset += spa->header.length;
/* spa4 (dcr2) dimm2 */
- spa = nfit_buf + sizeof(*spa) * 4;
+ spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
spa->range_index = 4+1;
spa->address = t->dcr_dma[2];
spa->length = DCR_SIZE;
+ offset += spa->header.length;
/* spa5 (dcr3) dimm3 */
- spa = nfit_buf + sizeof(*spa) * 5;
+ spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
spa->range_index = 5+1;
spa->address = t->dcr_dma[3];
spa->length = DCR_SIZE;
+ offset += spa->header.length;
/* spa6 (bdw for dcr0) dimm0 */
- spa = nfit_buf + sizeof(*spa) * 6;
+ spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
spa->range_index = 6+1;
spa->address = t->dimm_dma[0];
spa->length = DIMM_SIZE;
+ offset += spa->header.length;
/* spa7 (bdw for dcr1) dimm1 */
- spa = nfit_buf + sizeof(*spa) * 7;
+ spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
spa->range_index = 7+1;
spa->address = t->dimm_dma[1];
spa->length = DIMM_SIZE;
+ offset += spa->header.length;
/* spa8 (bdw for dcr2) dimm2 */
- spa = nfit_buf + sizeof(*spa) * 8;
+ spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
spa->range_index = 8+1;
spa->address = t->dimm_dma[2];
spa->length = DIMM_SIZE;
+ offset += spa->header.length;
/* spa9 (bdw for dcr3) dimm3 */
- spa = nfit_buf + sizeof(*spa) * 9;
+ spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
spa->range_index = 9+1;
spa->address = t->dimm_dma[3];
spa->length = DIMM_SIZE;
+ offset += spa->header.length;
- offset = sizeof(*spa) * 10;
/* mem-region0 (spa0, dimm0) */
memdev = nfit_buf + offset;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@@ -1481,9 +1570,10 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->address = 0;
memdev->interleave_index = 0;
memdev->interleave_ways = 2;
+ offset += memdev->header.length;
/* mem-region1 (spa0, dimm1) */
- memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map);
+ memdev = nfit_buf + offset;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
memdev->header.length = sizeof(*memdev);
memdev->device_handle = handle[1];
@@ -1497,9 +1587,10 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->interleave_index = 0;
memdev->interleave_ways = 2;
memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+ offset += memdev->header.length;
/* mem-region2 (spa1, dimm0) */
- memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 2;
+ memdev = nfit_buf + offset;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
memdev->header.length = sizeof(*memdev);
memdev->device_handle = handle[0];
@@ -1513,9 +1604,10 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->interleave_index = 0;
memdev->interleave_ways = 4;
memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+ offset += memdev->header.length;
/* mem-region3 (spa1, dimm1) */
- memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 3;
+ memdev = nfit_buf + offset;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
memdev->header.length = sizeof(*memdev);
memdev->device_handle = handle[1];
@@ -1528,9 +1620,10 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->address = SPA0_SIZE/2;
memdev->interleave_index = 0;
memdev->interleave_ways = 4;
+ offset += memdev->header.length;
/* mem-region4 (spa1, dimm2) */
- memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 4;
+ memdev = nfit_buf + offset;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
memdev->header.length = sizeof(*memdev);
memdev->device_handle = handle[2];
@@ -1544,9 +1637,10 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->interleave_index = 0;
memdev->interleave_ways = 4;
memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+ offset += memdev->header.length;
/* mem-region5 (spa1, dimm3) */
- memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 5;
+ memdev = nfit_buf + offset;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
memdev->header.length = sizeof(*memdev);
memdev->device_handle = handle[3];
@@ -1559,9 +1653,10 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->address = SPA0_SIZE/2;
memdev->interleave_index = 0;
memdev->interleave_ways = 4;
+ offset += memdev->header.length;
/* mem-region6 (spa/dcr0, dimm0) */
- memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 6;
+ memdev = nfit_buf + offset;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
memdev->header.length = sizeof(*memdev);
memdev->device_handle = handle[0];
@@ -1574,9 +1669,10 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->address = 0;
memdev->interleave_index = 0;
memdev->interleave_ways = 1;
+ offset += memdev->header.length;
/* mem-region7 (spa/dcr1, dimm1) */
- memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 7;
+ memdev = nfit_buf + offset;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
memdev->header.length = sizeof(*memdev);
memdev->device_handle = handle[1];
@@ -1589,9 +1685,10 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->address = 0;
memdev->interleave_index = 0;
memdev->interleave_ways = 1;
+ offset += memdev->header.length;
/* mem-region8 (spa/dcr2, dimm2) */
- memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 8;
+ memdev = nfit_buf + offset;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
memdev->header.length = sizeof(*memdev);
memdev->device_handle = handle[2];
@@ -1604,9 +1701,10 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->address = 0;
memdev->interleave_index = 0;
memdev->interleave_ways = 1;
+ offset += memdev->header.length;
/* mem-region9 (spa/dcr3, dimm3) */
- memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 9;
+ memdev = nfit_buf + offset;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
memdev->header.length = sizeof(*memdev);
memdev->device_handle = handle[3];
@@ -1619,9 +1717,10 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->address = 0;
memdev->interleave_index = 0;
memdev->interleave_ways = 1;
+ offset += memdev->header.length;
/* mem-region10 (spa/bdw0, dimm0) */
- memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 10;
+ memdev = nfit_buf + offset;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
memdev->header.length = sizeof(*memdev);
memdev->device_handle = handle[0];
@@ -1634,9 +1733,10 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->address = 0;
memdev->interleave_index = 0;
memdev->interleave_ways = 1;
+ offset += memdev->header.length;
/* mem-region11 (spa/bdw1, dimm1) */
- memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 11;
+ memdev = nfit_buf + offset;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
memdev->header.length = sizeof(*memdev);
memdev->device_handle = handle[1];
@@ -1649,9 +1749,10 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->address = 0;
memdev->interleave_index = 0;
memdev->interleave_ways = 1;
+ offset += memdev->header.length;
/* mem-region12 (spa/bdw2, dimm2) */
- memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 12;
+ memdev = nfit_buf + offset;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
memdev->header.length = sizeof(*memdev);
memdev->device_handle = handle[2];
@@ -1664,9 +1765,10 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->address = 0;
memdev->interleave_index = 0;
memdev->interleave_ways = 1;
+ offset += memdev->header.length;
/* mem-region13 (spa/dcr3, dimm3) */
- memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 13;
+ memdev = nfit_buf + offset;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
memdev->header.length = sizeof(*memdev);
memdev->device_handle = handle[3];
@@ -1680,12 +1782,12 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->interleave_index = 0;
memdev->interleave_ways = 1;
memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+ offset += memdev->header.length;
- offset = offset + sizeof(struct acpi_nfit_memory_map) * 14;
/* dcr-descriptor0: blk */
dcr = nfit_buf + offset;
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
- dcr->header.length = sizeof(struct acpi_nfit_control_region);
+ dcr->header.length = sizeof(*dcr);
dcr->region_index = 0+1;
dcr_common_init(dcr);
dcr->serial_number = ~handle[0];
@@ -1696,11 +1798,12 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->command_size = 8;
dcr->status_offset = 8;
dcr->status_size = 4;
+ offset += dcr->header.length;
/* dcr-descriptor1: blk */
- dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region);
+ dcr = nfit_buf + offset;
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
- dcr->header.length = sizeof(struct acpi_nfit_control_region);
+ dcr->header.length = sizeof(*dcr);
dcr->region_index = 1+1;
dcr_common_init(dcr);
dcr->serial_number = ~handle[1];
@@ -1711,11 +1814,12 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->command_size = 8;
dcr->status_offset = 8;
dcr->status_size = 4;
+ offset += dcr->header.length;
/* dcr-descriptor2: blk */
- dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 2;
+ dcr = nfit_buf + offset;
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
- dcr->header.length = sizeof(struct acpi_nfit_control_region);
+ dcr->header.length = sizeof(*dcr);
dcr->region_index = 2+1;
dcr_common_init(dcr);
dcr->serial_number = ~handle[2];
@@ -1726,11 +1830,12 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->command_size = 8;
dcr->status_offset = 8;
dcr->status_size = 4;
+ offset += dcr->header.length;
/* dcr-descriptor3: blk */
- dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 3;
+ dcr = nfit_buf + offset;
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
- dcr->header.length = sizeof(struct acpi_nfit_control_region);
+ dcr->header.length = sizeof(*dcr);
dcr->region_index = 3+1;
dcr_common_init(dcr);
dcr->serial_number = ~handle[3];
@@ -1741,8 +1846,8 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->command_size = 8;
dcr->status_offset = 8;
dcr->status_size = 4;
+ offset += dcr->header.length;
- offset = offset + sizeof(struct acpi_nfit_control_region) * 4;
/* dcr-descriptor0: pmem */
dcr = nfit_buf + offset;
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
@@ -1753,10 +1858,10 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->serial_number = ~handle[0];
dcr->code = NFIT_FIC_BYTEN;
dcr->windows = 0;
+ offset += dcr->header.length;
/* dcr-descriptor1: pmem */
- dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region,
- window_size);
+ dcr = nfit_buf + offset;
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
dcr->header.length = offsetof(struct acpi_nfit_control_region,
window_size);
@@ -1765,10 +1870,10 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->serial_number = ~handle[1];
dcr->code = NFIT_FIC_BYTEN;
dcr->windows = 0;
+ offset += dcr->header.length;
/* dcr-descriptor2: pmem */
- dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region,
- window_size) * 2;
+ dcr = nfit_buf + offset;
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
dcr->header.length = offsetof(struct acpi_nfit_control_region,
window_size);
@@ -1777,10 +1882,10 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->serial_number = ~handle[2];
dcr->code = NFIT_FIC_BYTEN;
dcr->windows = 0;
+ offset += dcr->header.length;
/* dcr-descriptor3: pmem */
- dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region,
- window_size) * 3;
+ dcr = nfit_buf + offset;
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
dcr->header.length = offsetof(struct acpi_nfit_control_region,
window_size);
@@ -1789,54 +1894,56 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->serial_number = ~handle[3];
dcr->code = NFIT_FIC_BYTEN;
dcr->windows = 0;
+ offset += dcr->header.length;
- offset = offset + offsetof(struct acpi_nfit_control_region,
- window_size) * 4;
/* bdw0 (spa/dcr0, dimm0) */
bdw = nfit_buf + offset;
bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
- bdw->header.length = sizeof(struct acpi_nfit_data_region);
+ bdw->header.length = sizeof(*bdw);
bdw->region_index = 0+1;
bdw->windows = 1;
bdw->offset = 0;
bdw->size = BDW_SIZE;
bdw->capacity = DIMM_SIZE;
bdw->start_address = 0;
+ offset += bdw->header.length;
/* bdw1 (spa/dcr1, dimm1) */
- bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region);
+ bdw = nfit_buf + offset;
bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
- bdw->header.length = sizeof(struct acpi_nfit_data_region);
+ bdw->header.length = sizeof(*bdw);
bdw->region_index = 1+1;
bdw->windows = 1;
bdw->offset = 0;
bdw->size = BDW_SIZE;
bdw->capacity = DIMM_SIZE;
bdw->start_address = 0;
+ offset += bdw->header.length;
/* bdw2 (spa/dcr2, dimm2) */
- bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 2;
+ bdw = nfit_buf + offset;
bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
- bdw->header.length = sizeof(struct acpi_nfit_data_region);
+ bdw->header.length = sizeof(*bdw);
bdw->region_index = 2+1;
bdw->windows = 1;
bdw->offset = 0;
bdw->size = BDW_SIZE;
bdw->capacity = DIMM_SIZE;
bdw->start_address = 0;
+ offset += bdw->header.length;
/* bdw3 (spa/dcr3, dimm3) */
- bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 3;
+ bdw = nfit_buf + offset;
bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
- bdw->header.length = sizeof(struct acpi_nfit_data_region);
+ bdw->header.length = sizeof(*bdw);
bdw->region_index = 3+1;
bdw->windows = 1;
bdw->offset = 0;
bdw->size = BDW_SIZE;
bdw->capacity = DIMM_SIZE;
bdw->start_address = 0;
+ offset += bdw->header.length;
- offset = offset + sizeof(struct acpi_nfit_data_region) * 4;
/* flush0 (dimm0) */
flush = nfit_buf + offset;
flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
@@ -1845,48 +1952,52 @@ static void nfit_test0_setup(struct nfit_test *t)
flush->hint_count = NUM_HINTS;
for (i = 0; i < NUM_HINTS; i++)
flush->hint_address[i] = t->flush_dma[0] + i * sizeof(u64);
+ offset += flush->header.length;
/* flush1 (dimm1) */
- flush = nfit_buf + offset + flush_hint_size * 1;
+ flush = nfit_buf + offset;
flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
flush->header.length = flush_hint_size;
flush->device_handle = handle[1];
flush->hint_count = NUM_HINTS;
for (i = 0; i < NUM_HINTS; i++)
flush->hint_address[i] = t->flush_dma[1] + i * sizeof(u64);
+ offset += flush->header.length;
/* flush2 (dimm2) */
- flush = nfit_buf + offset + flush_hint_size * 2;
+ flush = nfit_buf + offset;
flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
flush->header.length = flush_hint_size;
flush->device_handle = handle[2];
flush->hint_count = NUM_HINTS;
for (i = 0; i < NUM_HINTS; i++)
flush->hint_address[i] = t->flush_dma[2] + i * sizeof(u64);
+ offset += flush->header.length;
/* flush3 (dimm3) */
- flush = nfit_buf + offset + flush_hint_size * 3;
+ flush = nfit_buf + offset;
flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
flush->header.length = flush_hint_size;
flush->device_handle = handle[3];
flush->hint_count = NUM_HINTS;
for (i = 0; i < NUM_HINTS; i++)
flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64);
+ offset += flush->header.length;
/* platform capabilities */
- pcap = nfit_buf + offset + flush_hint_size * 4;
+ pcap = nfit_buf + offset;
pcap->header.type = ACPI_NFIT_TYPE_CAPABILITIES;
pcap->header.length = sizeof(*pcap);
pcap->highest_capability = 1;
pcap->capabilities = ACPI_NFIT_CAPABILITY_CACHE_FLUSH |
ACPI_NFIT_CAPABILITY_MEM_FLUSH;
+ offset += pcap->header.length;
if (t->setup_hotplug) {
- offset = offset + flush_hint_size * 4 + sizeof(*pcap);
/* dcr-descriptor4: blk */
dcr = nfit_buf + offset;
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
- dcr->header.length = sizeof(struct acpi_nfit_control_region);
+ dcr->header.length = sizeof(*dcr);
dcr->region_index = 8+1;
dcr_common_init(dcr);
dcr->serial_number = ~handle[4];
@@ -1897,8 +2008,8 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->command_size = 8;
dcr->status_offset = 8;
dcr->status_size = 4;
+ offset += dcr->header.length;
- offset = offset + sizeof(struct acpi_nfit_control_region);
/* dcr-descriptor4: pmem */
dcr = nfit_buf + offset;
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
@@ -1909,21 +2020,20 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->serial_number = ~handle[4];
dcr->code = NFIT_FIC_BYTEN;
dcr->windows = 0;
+ offset += dcr->header.length;
- offset = offset + offsetof(struct acpi_nfit_control_region,
- window_size);
/* bdw4 (spa/dcr4, dimm4) */
bdw = nfit_buf + offset;
bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
- bdw->header.length = sizeof(struct acpi_nfit_data_region);
+ bdw->header.length = sizeof(*bdw);
bdw->region_index = 8+1;
bdw->windows = 1;
bdw->offset = 0;
bdw->size = BDW_SIZE;
bdw->capacity = DIMM_SIZE;
bdw->start_address = 0;
+ offset += bdw->header.length;
- offset = offset + sizeof(struct acpi_nfit_data_region);
/* spa10 (dcr4) dimm4 */
spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
@@ -1932,30 +2042,32 @@ static void nfit_test0_setup(struct nfit_test *t)
spa->range_index = 10+1;
spa->address = t->dcr_dma[4];
spa->length = DCR_SIZE;
+ offset += spa->header.length;
/*
* spa11 (single-dimm interleave for hotplug, note storage
* does not actually alias the related block-data-window
* regions)
*/
- spa = nfit_buf + offset + sizeof(*spa);
+ spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
spa->range_index = 11+1;
spa->address = t->spa_set_dma[2];
spa->length = SPA0_SIZE;
+ offset += spa->header.length;
/* spa12 (bdw for dcr4) dimm4 */
- spa = nfit_buf + offset + sizeof(*spa) * 2;
+ spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
spa->range_index = 12+1;
spa->address = t->dimm_dma[4];
spa->length = DIMM_SIZE;
+ offset += spa->header.length;
- offset = offset + sizeof(*spa) * 3;
/* mem-region14 (spa/dcr4, dimm4) */
memdev = nfit_buf + offset;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@@ -1970,10 +2082,10 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->address = 0;
memdev->interleave_index = 0;
memdev->interleave_ways = 1;
+ offset += memdev->header.length;
- /* mem-region15 (spa0, dimm4) */
- memdev = nfit_buf + offset +
- sizeof(struct acpi_nfit_memory_map);
+ /* mem-region15 (spa11, dimm4) */
+ memdev = nfit_buf + offset;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
memdev->header.length = sizeof(*memdev);
memdev->device_handle = handle[4];
@@ -1987,10 +2099,10 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->interleave_index = 0;
memdev->interleave_ways = 1;
memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+ offset += memdev->header.length;
/* mem-region16 (spa/bdw4, dimm4) */
- memdev = nfit_buf + offset +
- sizeof(struct acpi_nfit_memory_map) * 2;
+ memdev = nfit_buf + offset;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
memdev->header.length = sizeof(*memdev);
memdev->device_handle = handle[4];
@@ -2003,8 +2115,8 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->address = 0;
memdev->interleave_index = 0;
memdev->interleave_ways = 1;
+ offset += memdev->header.length;
- offset = offset + sizeof(struct acpi_nfit_memory_map) * 3;
/* flush3 (dimm4) */
flush = nfit_buf + offset;
flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
@@ -2014,8 +2126,14 @@ static void nfit_test0_setup(struct nfit_test *t)
for (i = 0; i < NUM_HINTS; i++)
flush->hint_address[i] = t->flush_dma[4]
+ i * sizeof(u64);
+ offset += flush->header.length;
+
+ /* sanity check to make sure we've filled the buffer */
+ WARN_ON(offset != t->nfit_size);
}
+ t->nfit_filled = offset;
+
post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0],
SPA0_SIZE);
@@ -2026,6 +2144,7 @@ static void nfit_test0_setup(struct nfit_test *t)
set_bit(ND_INTEL_SMART, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_INTEL_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_INTEL_SMART_SET_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_SMART_INJECT, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
@@ -2061,17 +2180,18 @@ static void nfit_test1_setup(struct nfit_test *t)
spa->range_index = 0+1;
spa->address = t->spa_set_dma[0];
spa->length = SPA2_SIZE;
+ offset += spa->header.length;
/* virtual cd region */
- spa = nfit_buf + sizeof(*spa);
+ spa = nfit_buf + offset;
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16);
spa->range_index = 0;
spa->address = t->spa_set_dma[1];
spa->length = SPA_VCD_SIZE;
+ offset += spa->header.length;
- offset += sizeof(*spa) * 2;
/* mem-region0 (spa0, dimm0) */
memdev = nfit_buf + offset;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@@ -2089,8 +2209,8 @@ static void nfit_test1_setup(struct nfit_test *t)
memdev->flags = ACPI_NFIT_MEM_SAVE_FAILED | ACPI_NFIT_MEM_RESTORE_FAILED
| ACPI_NFIT_MEM_FLUSH_FAILED | ACPI_NFIT_MEM_HEALTH_OBSERVED
| ACPI_NFIT_MEM_NOT_ARMED;
+ offset += memdev->header.length;
- offset += sizeof(*memdev);
/* dcr-descriptor0 */
dcr = nfit_buf + offset;
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
@@ -2101,8 +2221,8 @@ static void nfit_test1_setup(struct nfit_test *t)
dcr->serial_number = ~handle[5];
dcr->code = NFIT_FIC_BYTE;
dcr->windows = 0;
-
offset += dcr->header.length;
+
memdev = nfit_buf + offset;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
memdev->header.length = sizeof(*memdev);
@@ -2117,9 +2237,9 @@ static void nfit_test1_setup(struct nfit_test *t)
memdev->interleave_index = 0;
memdev->interleave_ways = 1;
memdev->flags = ACPI_NFIT_MEM_MAP_FAILED;
+ offset += memdev->header.length;
/* dcr-descriptor1 */
- offset += sizeof(*memdev);
dcr = nfit_buf + offset;
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
dcr->header.length = offsetof(struct acpi_nfit_control_region,
@@ -2129,6 +2249,12 @@ static void nfit_test1_setup(struct nfit_test *t)
dcr->serial_number = ~handle[6];
dcr->code = NFIT_FIC_BYTE;
dcr->windows = 0;
+ offset += dcr->header.length;
+
+ /* sanity check to make sure we've filled the buffer */
+ WARN_ON(offset != t->nfit_size);
+
+ t->nfit_filled = offset;
post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0],
SPA2_SIZE);
@@ -2139,6 +2265,9 @@ static void nfit_test1_setup(struct nfit_test *t)
set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
}
static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
@@ -2487,7 +2616,7 @@ static int nfit_test_probe(struct platform_device *pdev)
nd_desc->ndctl = nfit_test_ctl;
rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf,
- nfit_test->nfit_size);
+ nfit_test->nfit_filled);
if (rc)
return rc;
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
index 428344519cdf..33752e06ff8d 100644
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -93,6 +93,7 @@ struct nd_cmd_ars_err_inj_stat {
#define ND_INTEL_FW_FINISH_UPDATE 15
#define ND_INTEL_FW_FINISH_QUERY 16
#define ND_INTEL_SMART_SET_THRESHOLD 17
+#define ND_INTEL_SMART_INJECT 18
#define ND_INTEL_SMART_HEALTH_VALID (1 << 0)
#define ND_INTEL_SMART_SPARES_VALID (1 << 1)
@@ -111,6 +112,10 @@ struct nd_cmd_ars_err_inj_stat {
#define ND_INTEL_SMART_NON_CRITICAL_HEALTH (1 << 0)
#define ND_INTEL_SMART_CRITICAL_HEALTH (1 << 1)
#define ND_INTEL_SMART_FATAL_HEALTH (1 << 2)
+#define ND_INTEL_SMART_INJECT_MTEMP (1 << 0)
+#define ND_INTEL_SMART_INJECT_SPARE (1 << 1)
+#define ND_INTEL_SMART_INJECT_FATAL (1 << 2)
+#define ND_INTEL_SMART_INJECT_SHUTDOWN (1 << 3)
struct nd_intel_smart {
__u32 status;
@@ -158,6 +163,17 @@ struct nd_intel_smart_set_threshold {
__u32 status;
} __packed;
+struct nd_intel_smart_inject {
+ __u64 flags;
+ __u8 mtemp_enable;
+ __u16 media_temperature;
+ __u8 spare_enable;
+ __u8 spares;
+ __u8 fatal_enable;
+ __u8 unsafe_shutdown_enable;
+ __u32 status;
+} __packed;
+
#define INTEL_FW_STORAGE_SIZE 0x100000
#define INTEL_FW_MAX_SEND_LEN 0xFFEC
#define INTEL_FW_QUERY_INTERVAL 250000
diff --git a/tools/testing/radix-tree/linux/gfp.h b/tools/testing/radix-tree/linux/gfp.h
index e3201ccf54c3..32159c08a52e 100644
--- a/tools/testing/radix-tree/linux/gfp.h
+++ b/tools/testing/radix-tree/linux/gfp.h
@@ -19,6 +19,7 @@
#define __GFP_RECLAIM (__GFP_DIRECT_RECLAIM|__GFP_KSWAPD_RECLAIM)
+#define GFP_ZONEMASK 0x0fu
#define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM)
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index dbda89c9d9b9..32aafa92074c 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -15,6 +15,7 @@ TARGETS += gpio
TARGETS += intel_pstate
TARGETS += ipc
TARGETS += kcmp
+TARGETS += kvm
TARGETS += lib
TARGETS += membarrier
TARGETS += memfd
@@ -24,6 +25,7 @@ TARGETS += mqueue
TARGETS += net
TARGETS += nsfs
TARGETS += powerpc
+TARGETS += proc
TARGETS += pstore
TARGETS += ptrace
TARGETS += seccomp
@@ -67,6 +69,12 @@ ifndef BUILD
BUILD := $(shell pwd)
endif
+# KSFT_TAP_LEVEL is used from KSFT framework to prevent nested TAP header
+# printing from tests. Applicable to run_tests case where run_tests adds
+# TAP header prior running tests and when a test program invokes another
+# with system() call. Export it here to cover override RUN_TESTS defines.
+export KSFT_TAP_LEVEL=`echo 1`
+
export BUILD
all:
@for TARGET in $(TARGETS); do \
@@ -126,11 +134,14 @@ ifdef INSTALL_PATH
echo "else" >> $(ALL_SCRIPT)
echo " OUTPUT=/dev/stdout" >> $(ALL_SCRIPT)
echo "fi" >> $(ALL_SCRIPT)
+ echo "export KSFT_TAP_LEVEL=`echo 1`" >> $(ALL_SCRIPT)
for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
- echo "echo ; echo Running tests in $$TARGET" >> $(ALL_SCRIPT); \
+ echo "echo ; echo TAP version 13" >> $(ALL_SCRIPT); \
+ echo "echo Running tests in $$TARGET" >> $(ALL_SCRIPT); \
echo "echo ========================================" >> $(ALL_SCRIPT); \
+ echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \
echo "cd $$TARGET" >> $(ALL_SCRIPT); \
make -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
echo "cd \$$ROOT" >> $(ALL_SCRIPT); \
diff --git a/tools/testing/selftests/android/ion/.gitignore b/tools/testing/selftests/android/ion/.gitignore
index 67e6f391b2a9..95e8f4561474 100644
--- a/tools/testing/selftests/android/ion/.gitignore
+++ b/tools/testing/selftests/android/ion/.gitignore
@@ -1,2 +1,3 @@
ionapp_export
ionapp_import
+ionmap_test
diff --git a/tools/testing/selftests/android/ion/Makefile b/tools/testing/selftests/android/ion/Makefile
index 96e0c448b39d..e03695287f76 100644
--- a/tools/testing/selftests/android/ion/Makefile
+++ b/tools/testing/selftests/android/ion/Makefile
@@ -1,8 +1,8 @@
-INCLUDEDIR := -I. -I../../../../../drivers/staging/android/uapi/
+INCLUDEDIR := -I. -I../../../../../drivers/staging/android/uapi/ -I../../../../../usr/include/
CFLAGS := $(CFLAGS) $(INCLUDEDIR) -Wall -O2 -g
-TEST_GEN_FILES := ionapp_export ionapp_import
+TEST_GEN_FILES := ionapp_export ionapp_import ionmap_test
all: $(TEST_GEN_FILES)
@@ -14,3 +14,4 @@ include ../../lib.mk
$(OUTPUT)/ionapp_export: ionapp_export.c ipcsocket.c ionutils.c
$(OUTPUT)/ionapp_import: ionapp_import.c ipcsocket.c ionutils.c
+$(OUTPUT)/ionmap_test: ionmap_test.c ionutils.c
diff --git a/tools/testing/selftests/android/ion/config b/tools/testing/selftests/android/ion/config
index 19db6ca9aa2b..b4ad748a9dd9 100644
--- a/tools/testing/selftests/android/ion/config
+++ b/tools/testing/selftests/android/ion/config
@@ -2,3 +2,4 @@ CONFIG_ANDROID=y
CONFIG_STAGING=y
CONFIG_ION=y
CONFIG_ION_SYSTEM_HEAP=y
+CONFIG_DRM_VGEM=y
diff --git a/tools/testing/selftests/android/ion/ionmap_test.c b/tools/testing/selftests/android/ion/ionmap_test.c
new file mode 100644
index 000000000000..dab36b06b37d
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ionmap_test.c
@@ -0,0 +1,136 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <linux/dma-buf.h>
+
+#include <drm/drm.h>
+
+#include "ion.h"
+#include "ionutils.h"
+
+int check_vgem(int fd)
+{
+ drm_version_t version = { 0 };
+ char name[5];
+ int ret;
+
+ version.name_len = 4;
+ version.name = name;
+
+ ret = ioctl(fd, DRM_IOCTL_VERSION, &version);
+ if (ret)
+ return 1;
+
+ return strcmp(name, "vgem");
+}
+
+int open_vgem(void)
+{
+ int i, fd;
+ const char *drmstr = "/dev/dri/card";
+
+ fd = -1;
+ for (i = 0; i < 16; i++) {
+ char name[80];
+
+ sprintf(name, "%s%u", drmstr, i);
+
+ fd = open(name, O_RDWR);
+ if (fd < 0)
+ continue;
+
+ if (check_vgem(fd)) {
+ close(fd);
+ continue;
+ } else {
+ break;
+ }
+
+ }
+ return fd;
+}
+
+int import_vgem_fd(int vgem_fd, int dma_buf_fd, uint32_t *handle)
+{
+ struct drm_prime_handle import_handle = { 0 };
+ int ret;
+
+ import_handle.fd = dma_buf_fd;
+ import_handle.flags = 0;
+ import_handle.handle = 0;
+
+ ret = ioctl(vgem_fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &import_handle);
+ if (ret == 0)
+ *handle = import_handle.handle;
+ return ret;
+}
+
+void close_handle(int vgem_fd, uint32_t handle)
+{
+ struct drm_gem_close close = { 0 };
+
+ close.handle = handle;
+ ioctl(vgem_fd, DRM_IOCTL_GEM_CLOSE, &close);
+}
+
+int main()
+{
+ int ret, vgem_fd;
+ struct ion_buffer_info info;
+ uint32_t handle = 0;
+ struct dma_buf_sync sync = { 0 };
+
+ info.heap_type = ION_HEAP_TYPE_SYSTEM;
+ info.heap_size = 4096;
+ info.flag_type = ION_FLAG_CACHED;
+
+ ret = ion_export_buffer_fd(&info);
+ if (ret < 0) {
+ printf("ion buffer alloc failed\n");
+ return -1;
+ }
+
+ vgem_fd = open_vgem();
+ if (vgem_fd < 0) {
+ ret = vgem_fd;
+ printf("Failed to open vgem\n");
+ goto out_ion;
+ }
+
+ ret = import_vgem_fd(vgem_fd, info.buffd, &handle);
+
+ if (ret < 0) {
+ printf("Failed to import buffer\n");
+ goto out_vgem;
+ }
+
+ sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_RW;
+ ret = ioctl(info.buffd, DMA_BUF_IOCTL_SYNC, &sync);
+ if (ret)
+ printf("sync start failed %d\n", errno);
+
+ memset(info.buffer, 0xff, 4096);
+
+ sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_RW;
+ ret = ioctl(info.buffd, DMA_BUF_IOCTL_SYNC, &sync);
+ if (ret)
+ printf("sync end failed %d\n", errno);
+
+ close_handle(vgem_fd, handle);
+ ret = 0;
+
+out_vgem:
+ close(vgem_fd);
+out_ion:
+ ion_close_buffer_fd(&info);
+ printf("done.\n");
+ return ret;
+}
diff --git a/tools/testing/selftests/android/ion/ionutils.c b/tools/testing/selftests/android/ion/ionutils.c
index ce69c14f51fa..7d1d37c4ef6a 100644
--- a/tools/testing/selftests/android/ion/ionutils.c
+++ b/tools/testing/selftests/android/ion/ionutils.c
@@ -80,11 +80,6 @@ int ion_export_buffer_fd(struct ion_buffer_info *ion_info)
heap_id = MAX_HEAP_COUNT + 1;
for (i = 0; i < query.cnt; i++) {
if (heap_data[i].type == ion_info->heap_type) {
- printf("--------------------------------------\n");
- printf("heap type: %d\n", heap_data[i].type);
- printf(" heap id: %d\n", heap_data[i].heap_id);
- printf("heap name: %s\n", heap_data[i].name);
- printf("--------------------------------------\n");
heap_id = heap_data[i].heap_id;
break;
}
@@ -204,7 +199,6 @@ void ion_close_buffer_fd(struct ion_buffer_info *ion_info)
/* Finally, close the client fd */
if (ion_info->ionfd > 0)
close(ion_info->ionfd);
- printf("<%s>: buffer release successfully....\n", __func__);
}
}
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 9cf83f895d98..5e1ab2f0eb79 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -12,3 +12,6 @@ test_tcpbpf_user
test_verifier_log
feature
test_libbpf_open
+test_sock
+test_sock_addr
+urandom_read
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index faadbe233966..4123d0ab90ba 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -1108,7 +1108,7 @@ static void test_stacktrace_build_id(void)
assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
== 0);
- assert(system("./urandom_read if=/dev/urandom of=/dev/zero count=4 2> /dev/null") == 0);
+ assert(system("./urandom_read") == 0);
/* disable stack trace collection */
key = 0;
val = 1;
@@ -1158,7 +1158,7 @@ static void test_stacktrace_build_id(void)
} while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
CHECK(build_id_matches < 1, "build id match",
- "Didn't find expected build ID from the map");
+ "Didn't find expected build ID from the map\n");
disable_pmu:
ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
index 73bb20cfb9b7..f4d99fabc56d 100644
--- a/tools/testing/selftests/bpf/test_sock.c
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -13,6 +13,7 @@
#include <bpf/bpf.h>
#include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
#ifndef ARRAY_SIZE
# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index d488f20926e8..2950f80ba7fb 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -15,6 +15,7 @@
#include <bpf/libbpf.h>
#include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
#define CG_PATH "/foo"
#define CONNECT4_PROG_PATH "./connect4_prog.o"
diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh
index c6e1dcf992c4..9832a875a828 100755
--- a/tools/testing/selftests/bpf/test_sock_addr.sh
+++ b/tools/testing/selftests/bpf/test_sock_addr.sh
@@ -4,7 +4,7 @@ set -eu
ping_once()
{
- ping -q -c 1 -W 1 ${1%%/*} >/dev/null 2>&1
+ ping -${1} -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1
}
wait_for_ip()
@@ -13,7 +13,7 @@ wait_for_ip()
echo -n "Wait for testing IPv4/IPv6 to become available "
for _i in $(seq ${MAX_PING_TRIES}); do
echo -n "."
- if ping_once ${TEST_IPv4} && ping_once ${TEST_IPv6}; then
+ if ping_once 4 ${TEST_IPv4} && ping_once 6 ${TEST_IPv6}; then
echo " OK"
return
fi
diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile
index 4e6d09fb166f..5c7d7001ad37 100644
--- a/tools/testing/selftests/filesystems/Makefile
+++ b/tools/testing/selftests/filesystems/Makefile
@@ -1,8 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := dnotify_test devpts_pts
-all: $(TEST_PROGS)
-include ../lib.mk
+TEST_GEN_PROGS := devpts_pts
+TEST_GEN_PROGS_EXTENDED := dnotify_test
-clean:
- rm -fr $(TEST_PROGS)
+include ../lib.mk
diff --git a/tools/testing/selftests/firmware/Makefile b/tools/testing/selftests/firmware/Makefile
index 826f38d5dd19..261c81f08606 100644
--- a/tools/testing/selftests/firmware/Makefile
+++ b/tools/testing/selftests/firmware/Makefile
@@ -4,6 +4,7 @@
all:
TEST_PROGS := fw_run_tests.sh
+TEST_FILES := fw_fallback.sh fw_filesystem.sh fw_lib.sh
include ../lib.mk
diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh
index 9ea31b57d71a..962d7f4ac627 100755
--- a/tools/testing/selftests/firmware/fw_lib.sh
+++ b/tools/testing/selftests/firmware/fw_lib.sh
@@ -154,11 +154,13 @@ test_finish()
if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout
fi
- if [ "$OLD_FWPATH" = "" ]; then
- OLD_FWPATH=" "
- fi
if [ "$TEST_REQS_FW_SET_CUSTOM_PATH" = "yes" ]; then
- echo -n "$OLD_FWPATH" >/sys/module/firmware_class/parameters/path
+ if [ "$OLD_FWPATH" = "" ]; then
+ # A zero-length write won't work; write a null byte
+ printf '\000' >/sys/module/firmware_class/parameters/path
+ else
+ echo -n "$OLD_FWPATH" >/sys/module/firmware_class/parameters/path
+ fi
fi
if [ -f $FW ]; then
rm -f "$FW"
diff --git a/tools/testing/selftests/firmware/fw_run_tests.sh b/tools/testing/selftests/firmware/fw_run_tests.sh
index 06d638e9dc62..cffdd4eb0a57 100755
--- a/tools/testing/selftests/firmware/fw_run_tests.sh
+++ b/tools/testing/selftests/firmware/fw_run_tests.sh
@@ -66,5 +66,5 @@ if [ -f $FW_FORCE_SYSFS_FALLBACK ]; then
run_test_config_0003
else
echo "Running basic kernel configuration, working with your config"
- run_test
+ run_tests
fi
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index df3dd7fe5f9b..2a4f16fc9819 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -59,6 +59,13 @@ disable_events() {
echo 0 > events/enable
}
+clear_synthetic_events() { # reset all current synthetic events
+ grep -v ^# synthetic_events |
+ while read line; do
+ echo "!$line" >> synthetic_events
+ done
+}
+
initialize_ftrace() { # Reset ftrace to initial-state
# As the initial state, ftrace will be set to nop tracer,
# no events, no triggers, no filters, no function filters,
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc
new file mode 100644
index 000000000000..2aabab363cfb
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc
@@ -0,0 +1,39 @@
+#!/bin/sh
+# description: event trigger - test extended error support
+
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test extended error support"
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+! echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger 2> /dev/null
+if ! grep -q "ERROR:" events/sched/sched_wakeup/hist; then
+ fail "Failed to generate extended error in histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
new file mode 100644
index 000000000000..7fd5b4a8f060
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
@@ -0,0 +1,54 @@
+#!/bin/sh
+# description: event trigger - test field variable support
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test field variable support"
+
+echo 'wakeup_latency u64 lat; pid_t pid; int prio; char comm[16]' > synthetic_events
+echo 'hist:keys=comm:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger
+echo 'hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger
+echo 'hist:keys=pid,prio,comm:vals=lat:sort=pid,prio' > events/synthetic/wakeup_latency/trigger
+
+ping localhost -c 3
+if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then
+ fail "Failed to create inter-event histogram"
+fi
+
+if ! grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then
+ fail "Failed to create histogram with field variable"
+fi
+
+echo '!hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+
+if grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then
+ fail "Failed to remove histogram with field variable"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
new file mode 100644
index 000000000000..c93dbe38b5df
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
@@ -0,0 +1,58 @@
+#!/bin/sh
+# description: event trigger - test inter-event combined histogram trigger
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+clear_synthetic_events
+
+echo "Test create synthetic event"
+
+echo 'waking_latency u64 lat pid_t pid' > synthetic_events
+if [ ! -d events/synthetic/waking_latency ]; then
+ fail "Failed to create waking_latency synthetic event"
+fi
+
+echo "Test combined histogram"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger
+echo 'hist:keys=pid:waking_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).waking_latency($waking_lat,pid) if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=pid,lat:sort=pid,lat' > events/synthetic/waking_latency/trigger
+
+echo 'wakeup_latency u64 lat pid_t pid' >> synthetic_events
+echo 'hist:keys=pid:ts1=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts1:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid) if next_comm=="ping"' > events/sched/sched_switch/trigger
+
+echo 'waking+wakeup_latency u64 lat; pid_t pid' >> synthetic_events
+echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking+wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger
+echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking+wakeup_latency/trigger
+
+ping localhost -c 3
+if ! grep -q "pid:" events/synthetic/waking+wakeup_latency/hist; then
+ fail "Failed to create combined histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc
new file mode 100644
index 000000000000..c193dce611a2
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc
@@ -0,0 +1,44 @@
+#!/bin/sh
+# description: event trigger - test multiple actions on hist trigger
+
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test multiple actions on hist trigger"
+echo 'wakeup_latency u64 lat; pid_t pid' >> synthetic_events
+TRIGGER1=events/sched/sched_wakeup/trigger
+TRIGGER2=events/sched/sched_switch/trigger
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="cyclictest"' > $TRIGGER1
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0 if next_comm=="cyclictest"' >> $TRIGGER2
+echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,next_pid) if next_comm=="cyclictest"' >> $TRIGGER2
+echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,prev_pid) if next_comm=="cyclictest"' >> $TRIGGER2
+echo 'hist:keys=next_pid if next_comm=="cyclictest"' >> $TRIGGER2
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
new file mode 100644
index 000000000000..e84e7d048566
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
@@ -0,0 +1,50 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmatch action
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+ fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test create histogram for synthetic event"
+echo "Test histogram variables,simple expression support and onmatch action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger
+echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger
+ping localhost -c 5
+if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then
+ fail "Failed to create onmatch action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
new file mode 100644
index 000000000000..7907d8aacde3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
@@ -0,0 +1,50 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmatch-onmax action
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+ fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test create histogram for synthetic event"
+echo "Test histogram variables,simple expression support and onmatch-onmax action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm):onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger
+ping localhost -c 5
+if [ ! grep -q "ping" events/synthetic/wakeup_latency/hist -o ! grep -q "max:" events/sched/sched_switch/hist]; then
+ fail "Failed to create onmatch-onmax action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
new file mode 100644
index 000000000000..38b7ed6242b2
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
@@ -0,0 +1,48 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmax action
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+ fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test onmax action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_waking/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+ping localhost -c 3
+if ! grep -q "max:" events/sched/sched_switch/hist; then
+ fail "Failed to create onmax action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
new file mode 100644
index 000000000000..cef11377dcbd
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
@@ -0,0 +1,54 @@
+#!/bin/sh
+# description: event trigger - test synthetic event create remove
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+ fail "Failed to create wakeup_latency synthetic event"
+fi
+
+reset_trigger
+
+echo "Test create synthetic event with an error"
+echo 'wakeup_latency u64 lat pid_t pid char' > synthetic_events > /dev/null
+if [ -d events/synthetic/wakeup_latency ]; then
+ fail "Created wakeup_latency synthetic event with an invalid format"
+fi
+
+reset_trigger
+
+echo "Test remove synthetic event"
+echo '!wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ -d events/synthetic/wakeup_latency ]; then
+ fail "Failed to delete wakeup_latency synthetic event"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
index a63e8453984d..8497a376ef9d 100644
--- a/tools/testing/selftests/futex/Makefile
+++ b/tools/testing/selftests/futex/Makefile
@@ -18,6 +18,10 @@ all:
done
override define RUN_TESTS
+ @export KSFT_TAP_LEVEL=`echo 1`;
+ @echo "TAP version 13";
+ @echo "selftests: futex";
+ @echo "========================================";
@cd $(OUTPUT); ./run.sh
endef
diff --git a/tools/testing/selftests/intel_pstate/Makefile b/tools/testing/selftests/intel_pstate/Makefile
index 5a3f7d37e912..7340fd6a9a9f 100644
--- a/tools/testing/selftests/intel_pstate/Makefile
+++ b/tools/testing/selftests/intel_pstate/Makefile
@@ -2,7 +2,10 @@
CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE
LDLIBS := $(LDLIBS) -lm
-ifeq (,$(filter $(ARCH),x86))
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+ifeq (x86,$(ARCH))
TEST_GEN_FILES := msr aperf
endif
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 1a52b03962a3..1b9d8ecdebce 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -57,7 +57,8 @@ static inline int ksft_get_error_cnt(void) { return ksft_cnt.ksft_error; }
static inline void ksft_print_header(void)
{
- printf("TAP version 13\n");
+ if (!(getenv("KSFT_TAP_LEVEL")))
+ printf("TAP version 13\n");
}
static inline void ksft_print_cnts(void)
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index e81bd28bdd89..6ae3730c4ee3 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -107,6 +107,27 @@
__FILE__, __LINE__, _metadata->name, ##__VA_ARGS__)
/**
+ * XFAIL(statement, fmt, ...)
+ *
+ * @statement: statement to run after reporting XFAIL
+ * @fmt: format string
+ * @...: optional arguments
+ *
+ * This forces a "pass" after reporting a failure with an XFAIL prefix,
+ * and runs "statement", which is usually "return" or "goto skip".
+ */
+#define XFAIL(statement, fmt, ...) do { \
+ if (TH_LOG_ENABLED) { \
+ fprintf(TH_LOG_STREAM, "[ XFAIL! ] " fmt "\n", \
+ ##__VA_ARGS__); \
+ } \
+ /* TODO: find a way to pass xfail to test runner process. */ \
+ _metadata->passed = 1; \
+ _metadata->trigger = 0; \
+ statement; \
+} while (0)
+
+/**
* TEST(test_name) - Defines the test function and creates the registration
* stub
*
@@ -198,7 +219,7 @@
/**
* FIXTURE_SETUP(fixture_name) - Prepares the setup function for the fixture.
- * *_metadata* is included so that ASSERT_* work as a convenience
+ * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly.
*
* @fixture_name: fixture name
*
@@ -221,6 +242,7 @@
FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
/**
* FIXTURE_TEARDOWN(fixture_name)
+ * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly.
*
* @fixture_name: fixture name
*
@@ -253,6 +275,8 @@
* Defines a test that depends on a fixture (e.g., is part of a test case).
* Very similar to TEST() except that *self* is the setup instance of fixture's
* datatype exposed for use by the implementation.
+ *
+ * Warning: use of ASSERT_* here will skip TEARDOWN.
*/
/* TODO(wad) register fixtures on dedicated test lists. */
#define TEST_F(fixture_name, test_name) \
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
new file mode 100644
index 000000000000..2ddcc96ae456
--- /dev/null
+++ b/tools/testing/selftests/kvm/Makefile
@@ -0,0 +1,40 @@
+all:
+
+top_srcdir = ../../../../
+UNAME_M := $(shell uname -m)
+
+LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c
+LIBKVM_x86_64 = lib/x86.c lib/vmx.c
+
+TEST_GEN_PROGS_x86_64 = set_sregs_test
+TEST_GEN_PROGS_x86_64 += sync_regs_test
+TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test
+
+TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
+LIBKVM += $(LIBKVM_$(UNAME_M))
+
+INSTALL_HDR_PATH = $(top_srcdir)/usr
+LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
+CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_HDR_PATH) -Iinclude -I$(<D)
+
+# After inclusion, $(OUTPUT) is defined and
+# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
+include ../lib.mk
+
+STATIC_LIBS := $(OUTPUT)/libkvm.a
+LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM))
+EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS)
+
+x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ))))
+$(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+$(OUTPUT)/libkvm.a: $(LIBKVM_OBJ)
+ $(AR) crs $@ $^
+
+$(LINUX_HDR_PATH):
+ make -C $(top_srcdir) headers_install
+
+all: $(STATIC_LIBS) $(LINUX_HDR_PATH)
+$(TEST_GEN_PROGS): $(STATIC_LIBS)
+$(TEST_GEN_PROGS) $(LIBKVM_OBJ): | $(LINUX_HDR_PATH)
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
new file mode 100644
index 000000000000..637b7017b6ee
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -0,0 +1,145 @@
+/*
+ * tools/testing/selftests/kvm/include/kvm_util.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+#ifndef SELFTEST_KVM_UTIL_H
+#define SELFTEST_KVM_UTIL_H 1
+
+#include "test_util.h"
+
+#include "asm/kvm.h"
+#include "linux/kvm.h"
+#include <sys/ioctl.h>
+
+#include "sparsebit.h"
+
+/*
+ * Memslots can't cover the gfn starting at this gpa otherwise vCPUs can't be
+ * created. Only applies to VMs using EPT.
+ */
+#define KVM_DEFAULT_IDENTITY_MAP_ADDRESS 0xfffbc000ul
+
+
+/* Callers of kvm_util only have an incomplete/opaque description of the
+ * structure kvm_util is using to maintain the state of a VM.
+ */
+struct kvm_vm;
+
+typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
+typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
+
+/* Minimum allocated guest virtual and physical addresses */
+#define KVM_UTIL_MIN_VADDR 0x2000
+
+#define DEFAULT_GUEST_PHY_PAGES 512
+#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000
+#define DEFAULT_STACK_PGS 5
+
+enum vm_guest_mode {
+ VM_MODE_FLAT48PG,
+};
+
+enum vm_mem_backing_src_type {
+ VM_MEM_SRC_ANONYMOUS,
+ VM_MEM_SRC_ANONYMOUS_THP,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB,
+};
+
+int kvm_check_cap(long cap);
+
+struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
+void kvm_vm_free(struct kvm_vm *vmp);
+
+int kvm_memcmp_hva_gva(void *hva,
+ struct kvm_vm *vm, const vm_vaddr_t gva, size_t len);
+
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
+ uint32_t data_memslot, uint32_t pgd_memslot);
+
+void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+void vcpu_dump(FILE *stream, struct kvm_vm *vm,
+ uint32_t vcpuid, uint8_t indent);
+
+void vm_create_irqchip(struct kvm_vm *vm);
+
+void vm_userspace_mem_region_add(struct kvm_vm *vm,
+ enum vm_mem_backing_src_type src_type,
+ uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+ uint32_t flags);
+
+void vcpu_ioctl(struct kvm_vm *vm,
+ uint32_t vcpuid, unsigned long ioctl, void *arg);
+void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
+void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
+ uint32_t data_memslot, uint32_t pgd_memslot);
+void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
+void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
+vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
+
+struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
+int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_mp_state *mp_state);
+void vcpu_regs_get(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_regs *regs);
+void vcpu_regs_set(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_regs *regs);
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...);
+void vcpu_sregs_get(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_sregs *sregs);
+void vcpu_sregs_set(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_sregs *sregs);
+int _vcpu_sregs_set(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_sregs *sregs);
+void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_vcpu_events *events);
+void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_vcpu_events *events);
+
+const char *exit_reason_str(unsigned int exit_reason);
+
+void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot);
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint32_t pgd_memslot);
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm,
+ vm_paddr_t paddr_min, uint32_t memslot);
+
+struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
+void vcpu_set_cpuid(
+ struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid);
+
+struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_index(uint32_t function, uint32_t index);
+
+static inline struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_entry(uint32_t function)
+{
+ return kvm_get_supported_cpuid_index(function, 0);
+}
+
+struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code);
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
+
+typedef void (*vmx_guest_code_t)(vm_vaddr_t vmxon_vaddr,
+ vm_paddr_t vmxon_paddr,
+ vm_vaddr_t vmcs_vaddr,
+ vm_paddr_t vmcs_paddr);
+
+struct kvm_userspace_memory_region *
+kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
+ uint64_t end);
+
+struct kvm_dirty_log *
+allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region);
+
+int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
+
+#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/sparsebit.h b/tools/testing/selftests/kvm/include/sparsebit.h
new file mode 100644
index 000000000000..54cfeb6568d3
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/sparsebit.h
@@ -0,0 +1,75 @@
+/*
+ * tools/testing/selftests/kvm/include/sparsebit.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ *
+ * Header file that describes API to the sparsebit library.
+ * This library provides a memory efficient means of storing
+ * the settings of bits indexed via a uint64_t. Memory usage
+ * is reasonable, significantly less than (2^64 / 8) bytes, as
+ * long as bits that are mostly set or mostly cleared are close
+ * to each other. This library is efficient in memory usage
+ * even in the case where most bits are set.
+ */
+
+#ifndef _TEST_SPARSEBIT_H_
+#define _TEST_SPARSEBIT_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct sparsebit;
+typedef uint64_t sparsebit_idx_t;
+typedef uint64_t sparsebit_num_t;
+
+struct sparsebit *sparsebit_alloc(void);
+void sparsebit_free(struct sparsebit **sbitp);
+void sparsebit_copy(struct sparsebit *dstp, struct sparsebit *src);
+
+bool sparsebit_is_set(struct sparsebit *sbit, sparsebit_idx_t idx);
+bool sparsebit_is_set_num(struct sparsebit *sbit,
+ sparsebit_idx_t idx, sparsebit_num_t num);
+bool sparsebit_is_clear(struct sparsebit *sbit, sparsebit_idx_t idx);
+bool sparsebit_is_clear_num(struct sparsebit *sbit,
+ sparsebit_idx_t idx, sparsebit_num_t num);
+sparsebit_num_t sparsebit_num_set(struct sparsebit *sbit);
+bool sparsebit_any_set(struct sparsebit *sbit);
+bool sparsebit_any_clear(struct sparsebit *sbit);
+bool sparsebit_all_set(struct sparsebit *sbit);
+bool sparsebit_all_clear(struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_first_set(struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_first_clear(struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_next_set(struct sparsebit *sbit, sparsebit_idx_t prev);
+sparsebit_idx_t sparsebit_next_clear(struct sparsebit *sbit, sparsebit_idx_t prev);
+sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *sbit,
+ sparsebit_idx_t start, sparsebit_num_t num);
+sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *sbit,
+ sparsebit_idx_t start, sparsebit_num_t num);
+
+void sparsebit_set(struct sparsebit *sbitp, sparsebit_idx_t idx);
+void sparsebit_set_num(struct sparsebit *sbitp, sparsebit_idx_t start,
+ sparsebit_num_t num);
+void sparsebit_set_all(struct sparsebit *sbitp);
+
+void sparsebit_clear(struct sparsebit *sbitp, sparsebit_idx_t idx);
+void sparsebit_clear_num(struct sparsebit *sbitp,
+ sparsebit_idx_t start, sparsebit_num_t num);
+void sparsebit_clear_all(struct sparsebit *sbitp);
+
+void sparsebit_dump(FILE *stream, struct sparsebit *sbit,
+ unsigned int indent);
+void sparsebit_validate_internal(struct sparsebit *sbit);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TEST_SPARSEBIT_H_ */
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
new file mode 100644
index 000000000000..7ab98e41324f
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -0,0 +1,45 @@
+/*
+ * tools/testing/selftests/kvm/include/test_util.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+
+#ifndef TEST_UTIL_H
+#define TEST_UTIL_H 1
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+ssize_t test_write(int fd, const void *buf, size_t count);
+ssize_t test_read(int fd, void *buf, size_t count);
+int test_seq_read(const char *path, char **bufp, size_t *sizep);
+
+void test_assert(bool exp, const char *exp_str,
+ const char *file, unsigned int line, const char *fmt, ...);
+
+#define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0]))
+
+#define TEST_ASSERT(e, fmt, ...) \
+ test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
+
+#define ASSERT_EQ(a, b) do { \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ TEST_ASSERT(__a == __b, \
+ "ASSERT_EQ(%s, %s) failed.\n" \
+ "\t%s is %#lx\n" \
+ "\t%s is %#lx", \
+ #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \
+} while (0)
+
+#endif /* TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/vmx.h b/tools/testing/selftests/kvm/include/vmx.h
new file mode 100644
index 000000000000..6ed8499807fd
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/vmx.h
@@ -0,0 +1,494 @@
+/*
+ * tools/testing/selftests/kvm/include/vmx.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+
+#ifndef SELFTEST_KVM_VMX_H
+#define SELFTEST_KVM_VMX_H
+
+#include <stdint.h>
+#include "x86.h"
+
+#define CPUID_VMX_BIT 5
+
+#define CPUID_VMX (1 << 5)
+
+/*
+ * Definitions of Primary Processor-Based VM-Execution Controls.
+ */
+#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004
+#define CPU_BASED_USE_TSC_OFFSETING 0x00000008
+#define CPU_BASED_HLT_EXITING 0x00000080
+#define CPU_BASED_INVLPG_EXITING 0x00000200
+#define CPU_BASED_MWAIT_EXITING 0x00000400
+#define CPU_BASED_RDPMC_EXITING 0x00000800
+#define CPU_BASED_RDTSC_EXITING 0x00001000
+#define CPU_BASED_CR3_LOAD_EXITING 0x00008000
+#define CPU_BASED_CR3_STORE_EXITING 0x00010000
+#define CPU_BASED_CR8_LOAD_EXITING 0x00080000
+#define CPU_BASED_CR8_STORE_EXITING 0x00100000
+#define CPU_BASED_TPR_SHADOW 0x00200000
+#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000
+#define CPU_BASED_MOV_DR_EXITING 0x00800000
+#define CPU_BASED_UNCOND_IO_EXITING 0x01000000
+#define CPU_BASED_USE_IO_BITMAPS 0x02000000
+#define CPU_BASED_MONITOR_TRAP 0x08000000
+#define CPU_BASED_USE_MSR_BITMAPS 0x10000000
+#define CPU_BASED_MONITOR_EXITING 0x20000000
+#define CPU_BASED_PAUSE_EXITING 0x40000000
+#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000
+
+#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172
+
+/*
+ * Definitions of Secondary Processor-Based VM-Execution Controls.
+ */
+#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
+#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
+#define SECONDARY_EXEC_DESC 0x00000004
+#define SECONDARY_EXEC_RDTSCP 0x00000008
+#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010
+#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
+#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
+#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
+#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100
+#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
+#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
+#define SECONDARY_EXEC_RDRAND_EXITING 0x00000800
+#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
+#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000
+#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
+#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000
+#define SECONDARY_EXEC_ENABLE_PML 0x00020000
+#define SECONDARY_EPT_VE 0x00040000
+#define SECONDARY_ENABLE_XSAV_RESTORE 0x00100000
+#define SECONDARY_EXEC_TSC_SCALING 0x02000000
+
+#define PIN_BASED_EXT_INTR_MASK 0x00000001
+#define PIN_BASED_NMI_EXITING 0x00000008
+#define PIN_BASED_VIRTUAL_NMIS 0x00000020
+#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040
+#define PIN_BASED_POSTED_INTR 0x00000080
+
+#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016
+
+#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004
+#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200
+#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000
+#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000
+#define VM_EXIT_SAVE_IA32_PAT 0x00040000
+#define VM_EXIT_LOAD_IA32_PAT 0x00080000
+#define VM_EXIT_SAVE_IA32_EFER 0x00100000
+#define VM_EXIT_LOAD_IA32_EFER 0x00200000
+#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000
+
+#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff
+
+#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004
+#define VM_ENTRY_IA32E_MODE 0x00000200
+#define VM_ENTRY_SMM 0x00000400
+#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800
+#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000
+#define VM_ENTRY_LOAD_IA32_PAT 0x00004000
+#define VM_ENTRY_LOAD_IA32_EFER 0x00008000
+
+#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff
+
+#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
+#define VMX_MISC_SAVE_EFER_LMA 0x00000020
+
+#define EXIT_REASON_FAILED_VMENTRY 0x80000000
+#define EXIT_REASON_EXCEPTION_NMI 0
+#define EXIT_REASON_EXTERNAL_INTERRUPT 1
+#define EXIT_REASON_TRIPLE_FAULT 2
+#define EXIT_REASON_PENDING_INTERRUPT 7
+#define EXIT_REASON_NMI_WINDOW 8
+#define EXIT_REASON_TASK_SWITCH 9
+#define EXIT_REASON_CPUID 10
+#define EXIT_REASON_HLT 12
+#define EXIT_REASON_INVD 13
+#define EXIT_REASON_INVLPG 14
+#define EXIT_REASON_RDPMC 15
+#define EXIT_REASON_RDTSC 16
+#define EXIT_REASON_VMCALL 18
+#define EXIT_REASON_VMCLEAR 19
+#define EXIT_REASON_VMLAUNCH 20
+#define EXIT_REASON_VMPTRLD 21
+#define EXIT_REASON_VMPTRST 22
+#define EXIT_REASON_VMREAD 23
+#define EXIT_REASON_VMRESUME 24
+#define EXIT_REASON_VMWRITE 25
+#define EXIT_REASON_VMOFF 26
+#define EXIT_REASON_VMON 27
+#define EXIT_REASON_CR_ACCESS 28
+#define EXIT_REASON_DR_ACCESS 29
+#define EXIT_REASON_IO_INSTRUCTION 30
+#define EXIT_REASON_MSR_READ 31
+#define EXIT_REASON_MSR_WRITE 32
+#define EXIT_REASON_INVALID_STATE 33
+#define EXIT_REASON_MWAIT_INSTRUCTION 36
+#define EXIT_REASON_MONITOR_INSTRUCTION 39
+#define EXIT_REASON_PAUSE_INSTRUCTION 40
+#define EXIT_REASON_MCE_DURING_VMENTRY 41
+#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+#define EXIT_REASON_APIC_ACCESS 44
+#define EXIT_REASON_EOI_INDUCED 45
+#define EXIT_REASON_EPT_VIOLATION 48
+#define EXIT_REASON_EPT_MISCONFIG 49
+#define EXIT_REASON_INVEPT 50
+#define EXIT_REASON_RDTSCP 51
+#define EXIT_REASON_PREEMPTION_TIMER 52
+#define EXIT_REASON_INVVPID 53
+#define EXIT_REASON_WBINVD 54
+#define EXIT_REASON_XSETBV 55
+#define EXIT_REASON_APIC_WRITE 56
+#define EXIT_REASON_INVPCID 58
+#define EXIT_REASON_PML_FULL 62
+#define EXIT_REASON_XSAVES 63
+#define EXIT_REASON_XRSTORS 64
+#define LAST_EXIT_REASON 64
+
+enum vmcs_field {
+ VIRTUAL_PROCESSOR_ID = 0x00000000,
+ POSTED_INTR_NV = 0x00000002,
+ GUEST_ES_SELECTOR = 0x00000800,
+ GUEST_CS_SELECTOR = 0x00000802,
+ GUEST_SS_SELECTOR = 0x00000804,
+ GUEST_DS_SELECTOR = 0x00000806,
+ GUEST_FS_SELECTOR = 0x00000808,
+ GUEST_GS_SELECTOR = 0x0000080a,
+ GUEST_LDTR_SELECTOR = 0x0000080c,
+ GUEST_TR_SELECTOR = 0x0000080e,
+ GUEST_INTR_STATUS = 0x00000810,
+ GUEST_PML_INDEX = 0x00000812,
+ HOST_ES_SELECTOR = 0x00000c00,
+ HOST_CS_SELECTOR = 0x00000c02,
+ HOST_SS_SELECTOR = 0x00000c04,
+ HOST_DS_SELECTOR = 0x00000c06,
+ HOST_FS_SELECTOR = 0x00000c08,
+ HOST_GS_SELECTOR = 0x00000c0a,
+ HOST_TR_SELECTOR = 0x00000c0c,
+ IO_BITMAP_A = 0x00002000,
+ IO_BITMAP_A_HIGH = 0x00002001,
+ IO_BITMAP_B = 0x00002002,
+ IO_BITMAP_B_HIGH = 0x00002003,
+ MSR_BITMAP = 0x00002004,
+ MSR_BITMAP_HIGH = 0x00002005,
+ VM_EXIT_MSR_STORE_ADDR = 0x00002006,
+ VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007,
+ VM_EXIT_MSR_LOAD_ADDR = 0x00002008,
+ VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009,
+ VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a,
+ VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b,
+ PML_ADDRESS = 0x0000200e,
+ PML_ADDRESS_HIGH = 0x0000200f,
+ TSC_OFFSET = 0x00002010,
+ TSC_OFFSET_HIGH = 0x00002011,
+ VIRTUAL_APIC_PAGE_ADDR = 0x00002012,
+ VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013,
+ APIC_ACCESS_ADDR = 0x00002014,
+ APIC_ACCESS_ADDR_HIGH = 0x00002015,
+ POSTED_INTR_DESC_ADDR = 0x00002016,
+ POSTED_INTR_DESC_ADDR_HIGH = 0x00002017,
+ EPT_POINTER = 0x0000201a,
+ EPT_POINTER_HIGH = 0x0000201b,
+ EOI_EXIT_BITMAP0 = 0x0000201c,
+ EOI_EXIT_BITMAP0_HIGH = 0x0000201d,
+ EOI_EXIT_BITMAP1 = 0x0000201e,
+ EOI_EXIT_BITMAP1_HIGH = 0x0000201f,
+ EOI_EXIT_BITMAP2 = 0x00002020,
+ EOI_EXIT_BITMAP2_HIGH = 0x00002021,
+ EOI_EXIT_BITMAP3 = 0x00002022,
+ EOI_EXIT_BITMAP3_HIGH = 0x00002023,
+ VMREAD_BITMAP = 0x00002026,
+ VMREAD_BITMAP_HIGH = 0x00002027,
+ VMWRITE_BITMAP = 0x00002028,
+ VMWRITE_BITMAP_HIGH = 0x00002029,
+ XSS_EXIT_BITMAP = 0x0000202C,
+ XSS_EXIT_BITMAP_HIGH = 0x0000202D,
+ TSC_MULTIPLIER = 0x00002032,
+ TSC_MULTIPLIER_HIGH = 0x00002033,
+ GUEST_PHYSICAL_ADDRESS = 0x00002400,
+ GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
+ VMCS_LINK_POINTER = 0x00002800,
+ VMCS_LINK_POINTER_HIGH = 0x00002801,
+ GUEST_IA32_DEBUGCTL = 0x00002802,
+ GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
+ GUEST_IA32_PAT = 0x00002804,
+ GUEST_IA32_PAT_HIGH = 0x00002805,
+ GUEST_IA32_EFER = 0x00002806,
+ GUEST_IA32_EFER_HIGH = 0x00002807,
+ GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808,
+ GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809,
+ GUEST_PDPTR0 = 0x0000280a,
+ GUEST_PDPTR0_HIGH = 0x0000280b,
+ GUEST_PDPTR1 = 0x0000280c,
+ GUEST_PDPTR1_HIGH = 0x0000280d,
+ GUEST_PDPTR2 = 0x0000280e,
+ GUEST_PDPTR2_HIGH = 0x0000280f,
+ GUEST_PDPTR3 = 0x00002810,
+ GUEST_PDPTR3_HIGH = 0x00002811,
+ GUEST_BNDCFGS = 0x00002812,
+ GUEST_BNDCFGS_HIGH = 0x00002813,
+ HOST_IA32_PAT = 0x00002c00,
+ HOST_IA32_PAT_HIGH = 0x00002c01,
+ HOST_IA32_EFER = 0x00002c02,
+ HOST_IA32_EFER_HIGH = 0x00002c03,
+ HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04,
+ HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05,
+ PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
+ CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
+ EXCEPTION_BITMAP = 0x00004004,
+ PAGE_FAULT_ERROR_CODE_MASK = 0x00004006,
+ PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008,
+ CR3_TARGET_COUNT = 0x0000400a,
+ VM_EXIT_CONTROLS = 0x0000400c,
+ VM_EXIT_MSR_STORE_COUNT = 0x0000400e,
+ VM_EXIT_MSR_LOAD_COUNT = 0x00004010,
+ VM_ENTRY_CONTROLS = 0x00004012,
+ VM_ENTRY_MSR_LOAD_COUNT = 0x00004014,
+ VM_ENTRY_INTR_INFO_FIELD = 0x00004016,
+ VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018,
+ VM_ENTRY_INSTRUCTION_LEN = 0x0000401a,
+ TPR_THRESHOLD = 0x0000401c,
+ SECONDARY_VM_EXEC_CONTROL = 0x0000401e,
+ PLE_GAP = 0x00004020,
+ PLE_WINDOW = 0x00004022,
+ VM_INSTRUCTION_ERROR = 0x00004400,
+ VM_EXIT_REASON = 0x00004402,
+ VM_EXIT_INTR_INFO = 0x00004404,
+ VM_EXIT_INTR_ERROR_CODE = 0x00004406,
+ IDT_VECTORING_INFO_FIELD = 0x00004408,
+ IDT_VECTORING_ERROR_CODE = 0x0000440a,
+ VM_EXIT_INSTRUCTION_LEN = 0x0000440c,
+ VMX_INSTRUCTION_INFO = 0x0000440e,
+ GUEST_ES_LIMIT = 0x00004800,
+ GUEST_CS_LIMIT = 0x00004802,
+ GUEST_SS_LIMIT = 0x00004804,
+ GUEST_DS_LIMIT = 0x00004806,
+ GUEST_FS_LIMIT = 0x00004808,
+ GUEST_GS_LIMIT = 0x0000480a,
+ GUEST_LDTR_LIMIT = 0x0000480c,
+ GUEST_TR_LIMIT = 0x0000480e,
+ GUEST_GDTR_LIMIT = 0x00004810,
+ GUEST_IDTR_LIMIT = 0x00004812,
+ GUEST_ES_AR_BYTES = 0x00004814,
+ GUEST_CS_AR_BYTES = 0x00004816,
+ GUEST_SS_AR_BYTES = 0x00004818,
+ GUEST_DS_AR_BYTES = 0x0000481a,
+ GUEST_FS_AR_BYTES = 0x0000481c,
+ GUEST_GS_AR_BYTES = 0x0000481e,
+ GUEST_LDTR_AR_BYTES = 0x00004820,
+ GUEST_TR_AR_BYTES = 0x00004822,
+ GUEST_INTERRUPTIBILITY_INFO = 0x00004824,
+ GUEST_ACTIVITY_STATE = 0X00004826,
+ GUEST_SYSENTER_CS = 0x0000482A,
+ VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
+ HOST_IA32_SYSENTER_CS = 0x00004c00,
+ CR0_GUEST_HOST_MASK = 0x00006000,
+ CR4_GUEST_HOST_MASK = 0x00006002,
+ CR0_READ_SHADOW = 0x00006004,
+ CR4_READ_SHADOW = 0x00006006,
+ CR3_TARGET_VALUE0 = 0x00006008,
+ CR3_TARGET_VALUE1 = 0x0000600a,
+ CR3_TARGET_VALUE2 = 0x0000600c,
+ CR3_TARGET_VALUE3 = 0x0000600e,
+ EXIT_QUALIFICATION = 0x00006400,
+ GUEST_LINEAR_ADDRESS = 0x0000640a,
+ GUEST_CR0 = 0x00006800,
+ GUEST_CR3 = 0x00006802,
+ GUEST_CR4 = 0x00006804,
+ GUEST_ES_BASE = 0x00006806,
+ GUEST_CS_BASE = 0x00006808,
+ GUEST_SS_BASE = 0x0000680a,
+ GUEST_DS_BASE = 0x0000680c,
+ GUEST_FS_BASE = 0x0000680e,
+ GUEST_GS_BASE = 0x00006810,
+ GUEST_LDTR_BASE = 0x00006812,
+ GUEST_TR_BASE = 0x00006814,
+ GUEST_GDTR_BASE = 0x00006816,
+ GUEST_IDTR_BASE = 0x00006818,
+ GUEST_DR7 = 0x0000681a,
+ GUEST_RSP = 0x0000681c,
+ GUEST_RIP = 0x0000681e,
+ GUEST_RFLAGS = 0x00006820,
+ GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822,
+ GUEST_SYSENTER_ESP = 0x00006824,
+ GUEST_SYSENTER_EIP = 0x00006826,
+ HOST_CR0 = 0x00006c00,
+ HOST_CR3 = 0x00006c02,
+ HOST_CR4 = 0x00006c04,
+ HOST_FS_BASE = 0x00006c06,
+ HOST_GS_BASE = 0x00006c08,
+ HOST_TR_BASE = 0x00006c0a,
+ HOST_GDTR_BASE = 0x00006c0c,
+ HOST_IDTR_BASE = 0x00006c0e,
+ HOST_IA32_SYSENTER_ESP = 0x00006c10,
+ HOST_IA32_SYSENTER_EIP = 0x00006c12,
+ HOST_RSP = 0x00006c14,
+ HOST_RIP = 0x00006c16,
+};
+
+struct vmx_msr_entry {
+ uint32_t index;
+ uint32_t reserved;
+ uint64_t value;
+} __attribute__ ((aligned(16)));
+
+static inline int vmxon(uint64_t phys)
+{
+ uint8_t ret;
+
+ __asm__ __volatile__ ("vmxon %[pa]; setna %[ret]"
+ : [ret]"=rm"(ret)
+ : [pa]"m"(phys)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline void vmxoff(void)
+{
+ __asm__ __volatile__("vmxoff");
+}
+
+static inline int vmclear(uint64_t vmcs_pa)
+{
+ uint8_t ret;
+
+ __asm__ __volatile__ ("vmclear %[pa]; setna %[ret]"
+ : [ret]"=rm"(ret)
+ : [pa]"m"(vmcs_pa)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline int vmptrld(uint64_t vmcs_pa)
+{
+ uint8_t ret;
+
+ __asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]"
+ : [ret]"=rm"(ret)
+ : [pa]"m"(vmcs_pa)
+ : "cc", "memory");
+
+ return ret;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmlaunch.
+ */
+static inline int vmlaunch(void)
+{
+ int ret;
+
+ __asm__ __volatile__("push %%rbp;"
+ "push %%rcx;"
+ "push %%rdx;"
+ "push %%rsi;"
+ "push %%rdi;"
+ "push $0;"
+ "vmwrite %%rsp, %[host_rsp];"
+ "lea 1f(%%rip), %%rax;"
+ "vmwrite %%rax, %[host_rip];"
+ "vmlaunch;"
+ "incq (%%rsp);"
+ "1: pop %%rax;"
+ "pop %%rdi;"
+ "pop %%rsi;"
+ "pop %%rdx;"
+ "pop %%rcx;"
+ "pop %%rbp;"
+ : [ret]"=&a"(ret)
+ : [host_rsp]"r"((uint64_t)HOST_RSP),
+ [host_rip]"r"((uint64_t)HOST_RIP)
+ : "memory", "cc", "rbx", "r8", "r9", "r10",
+ "r11", "r12", "r13", "r14", "r15");
+ return ret;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmresume.
+ */
+static inline int vmresume(void)
+{
+ int ret;
+
+ __asm__ __volatile__("push %%rbp;"
+ "push %%rcx;"
+ "push %%rdx;"
+ "push %%rsi;"
+ "push %%rdi;"
+ "push $0;"
+ "vmwrite %%rsp, %[host_rsp];"
+ "lea 1f(%%rip), %%rax;"
+ "vmwrite %%rax, %[host_rip];"
+ "vmresume;"
+ "incq (%%rsp);"
+ "1: pop %%rax;"
+ "pop %%rdi;"
+ "pop %%rsi;"
+ "pop %%rdx;"
+ "pop %%rcx;"
+ "pop %%rbp;"
+ : [ret]"=&a"(ret)
+ : [host_rsp]"r"((uint64_t)HOST_RSP),
+ [host_rip]"r"((uint64_t)HOST_RIP)
+ : "memory", "cc", "rbx", "r8", "r9", "r10",
+ "r11", "r12", "r13", "r14", "r15");
+ return ret;
+}
+
+static inline int vmread(uint64_t encoding, uint64_t *value)
+{
+ uint64_t tmp;
+ uint8_t ret;
+
+ __asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]"
+ : [value]"=rm"(tmp), [ret]"=rm"(ret)
+ : [encoding]"r"(encoding)
+ : "cc", "memory");
+
+ *value = tmp;
+ return ret;
+}
+
+/*
+ * A wrapper around vmread that ignores errors and returns zero if the
+ * vmread instruction fails.
+ */
+static inline uint64_t vmreadz(uint64_t encoding)
+{
+ uint64_t value = 0;
+ vmread(encoding, &value);
+ return value;
+}
+
+static inline int vmwrite(uint64_t encoding, uint64_t value)
+{
+ uint8_t ret;
+
+ __asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]"
+ : [ret]"=rm"(ret)
+ : [value]"rm"(value), [encoding]"r"(encoding)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline uint32_t vmcs_revision(void)
+{
+ return rdmsr(MSR_IA32_VMX_BASIC);
+}
+
+void prepare_for_vmx_operation(void);
+void prepare_vmcs(void *guest_rip, void *guest_rsp);
+struct kvm_vm *vm_create_default_vmx(uint32_t vcpuid,
+ vmx_guest_code_t guest_code);
+
+#endif /* !SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/include/x86.h b/tools/testing/selftests/kvm/include/x86.h
new file mode 100644
index 000000000000..4a5b2c4c1a0f
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86.h
@@ -0,0 +1,1043 @@
+/*
+ * tools/testing/selftests/kvm/include/x86.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+
+#ifndef SELFTEST_KVM_X86_H
+#define SELFTEST_KVM_X86_H
+
+#include <assert.h>
+#include <stdint.h>
+
+#define X86_EFLAGS_FIXED (1u << 1)
+
+#define X86_CR4_VME (1ul << 0)
+#define X86_CR4_PVI (1ul << 1)
+#define X86_CR4_TSD (1ul << 2)
+#define X86_CR4_DE (1ul << 3)
+#define X86_CR4_PSE (1ul << 4)
+#define X86_CR4_PAE (1ul << 5)
+#define X86_CR4_MCE (1ul << 6)
+#define X86_CR4_PGE (1ul << 7)
+#define X86_CR4_PCE (1ul << 8)
+#define X86_CR4_OSFXSR (1ul << 9)
+#define X86_CR4_OSXMMEXCPT (1ul << 10)
+#define X86_CR4_UMIP (1ul << 11)
+#define X86_CR4_VMXE (1ul << 13)
+#define X86_CR4_SMXE (1ul << 14)
+#define X86_CR4_FSGSBASE (1ul << 16)
+#define X86_CR4_PCIDE (1ul << 17)
+#define X86_CR4_OSXSAVE (1ul << 18)
+#define X86_CR4_SMEP (1ul << 20)
+#define X86_CR4_SMAP (1ul << 21)
+#define X86_CR4_PKE (1ul << 22)
+
+/* The enum values match the intruction encoding of each register */
+enum x86_register {
+ RAX = 0,
+ RCX,
+ RDX,
+ RBX,
+ RSP,
+ RBP,
+ RSI,
+ RDI,
+ R8,
+ R9,
+ R10,
+ R11,
+ R12,
+ R13,
+ R14,
+ R15,
+};
+
+struct desc64 {
+ uint16_t limit0;
+ uint16_t base0;
+ unsigned base1:8, type:5, dpl:2, p:1;
+ unsigned limit1:4, zero0:3, g:1, base2:8;
+ uint32_t base3;
+ uint32_t zero1;
+} __attribute__((packed));
+
+struct desc_ptr {
+ uint16_t size;
+ uint64_t address;
+} __attribute__((packed));
+
+static inline uint64_t get_desc64_base(const struct desc64 *desc)
+{
+ return ((uint64_t)desc->base3 << 32) |
+ (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
+}
+
+static inline uint64_t rdtsc(void)
+{
+ uint32_t eax, edx;
+
+ /*
+ * The lfence is to wait (on Intel CPUs) until all previous
+ * instructions have been executed.
+ */
+ __asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx));
+ return ((uint64_t)edx) << 32 | eax;
+}
+
+static inline uint64_t rdtscp(uint32_t *aux)
+{
+ uint32_t eax, edx;
+
+ __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux));
+ return ((uint64_t)edx) << 32 | eax;
+}
+
+static inline uint64_t rdmsr(uint32_t msr)
+{
+ uint32_t a, d;
+
+ __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+ return a | ((uint64_t) d << 32);
+}
+
+static inline void wrmsr(uint32_t msr, uint64_t value)
+{
+ uint32_t a = value;
+ uint32_t d = value >> 32;
+
+ __asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+
+static inline uint16_t inw(uint16_t port)
+{
+ uint16_t tmp;
+
+ __asm__ __volatile__("in %%dx, %%ax"
+ : /* output */ "=a" (tmp)
+ : /* input */ "d" (port));
+
+ return tmp;
+}
+
+static inline uint16_t get_es(void)
+{
+ uint16_t es;
+
+ __asm__ __volatile__("mov %%es, %[es]"
+ : /* output */ [es]"=rm"(es));
+ return es;
+}
+
+static inline uint16_t get_cs(void)
+{
+ uint16_t cs;
+
+ __asm__ __volatile__("mov %%cs, %[cs]"
+ : /* output */ [cs]"=rm"(cs));
+ return cs;
+}
+
+static inline uint16_t get_ss(void)
+{
+ uint16_t ss;
+
+ __asm__ __volatile__("mov %%ss, %[ss]"
+ : /* output */ [ss]"=rm"(ss));
+ return ss;
+}
+
+static inline uint16_t get_ds(void)
+{
+ uint16_t ds;
+
+ __asm__ __volatile__("mov %%ds, %[ds]"
+ : /* output */ [ds]"=rm"(ds));
+ return ds;
+}
+
+static inline uint16_t get_fs(void)
+{
+ uint16_t fs;
+
+ __asm__ __volatile__("mov %%fs, %[fs]"
+ : /* output */ [fs]"=rm"(fs));
+ return fs;
+}
+
+static inline uint16_t get_gs(void)
+{
+ uint16_t gs;
+
+ __asm__ __volatile__("mov %%gs, %[gs]"
+ : /* output */ [gs]"=rm"(gs));
+ return gs;
+}
+
+static inline uint16_t get_tr(void)
+{
+ uint16_t tr;
+
+ __asm__ __volatile__("str %[tr]"
+ : /* output */ [tr]"=rm"(tr));
+ return tr;
+}
+
+static inline uint64_t get_cr0(void)
+{
+ uint64_t cr0;
+
+ __asm__ __volatile__("mov %%cr0, %[cr0]"
+ : /* output */ [cr0]"=r"(cr0));
+ return cr0;
+}
+
+static inline uint64_t get_cr3(void)
+{
+ uint64_t cr3;
+
+ __asm__ __volatile__("mov %%cr3, %[cr3]"
+ : /* output */ [cr3]"=r"(cr3));
+ return cr3;
+}
+
+static inline uint64_t get_cr4(void)
+{
+ uint64_t cr4;
+
+ __asm__ __volatile__("mov %%cr4, %[cr4]"
+ : /* output */ [cr4]"=r"(cr4));
+ return cr4;
+}
+
+static inline void set_cr4(uint64_t val)
+{
+ __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
+}
+
+static inline uint64_t get_gdt_base(void)
+{
+ struct desc_ptr gdt;
+ __asm__ __volatile__("sgdt %[gdt]"
+ : /* output */ [gdt]"=m"(gdt));
+ return gdt.address;
+}
+
+static inline uint64_t get_idt_base(void)
+{
+ struct desc_ptr idt;
+ __asm__ __volatile__("sidt %[idt]"
+ : /* output */ [idt]"=m"(idt));
+ return idt.address;
+}
+
+#define SET_XMM(__var, __xmm) \
+ asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm)
+
+static inline void set_xmm(int n, unsigned long val)
+{
+ switch (n) {
+ case 0:
+ SET_XMM(val, xmm0);
+ break;
+ case 1:
+ SET_XMM(val, xmm1);
+ break;
+ case 2:
+ SET_XMM(val, xmm2);
+ break;
+ case 3:
+ SET_XMM(val, xmm3);
+ break;
+ case 4:
+ SET_XMM(val, xmm4);
+ break;
+ case 5:
+ SET_XMM(val, xmm5);
+ break;
+ case 6:
+ SET_XMM(val, xmm6);
+ break;
+ case 7:
+ SET_XMM(val, xmm7);
+ break;
+ }
+}
+
+typedef unsigned long v1di __attribute__ ((vector_size (8)));
+static inline unsigned long get_xmm(int n)
+{
+ assert(n >= 0 && n <= 7);
+
+ register v1di xmm0 __asm__("%xmm0");
+ register v1di xmm1 __asm__("%xmm1");
+ register v1di xmm2 __asm__("%xmm2");
+ register v1di xmm3 __asm__("%xmm3");
+ register v1di xmm4 __asm__("%xmm4");
+ register v1di xmm5 __asm__("%xmm5");
+ register v1di xmm6 __asm__("%xmm6");
+ register v1di xmm7 __asm__("%xmm7");
+ switch (n) {
+ case 0:
+ return (unsigned long)xmm0;
+ case 1:
+ return (unsigned long)xmm1;
+ case 2:
+ return (unsigned long)xmm2;
+ case 3:
+ return (unsigned long)xmm3;
+ case 4:
+ return (unsigned long)xmm4;
+ case 5:
+ return (unsigned long)xmm5;
+ case 6:
+ return (unsigned long)xmm6;
+ case 7:
+ return (unsigned long)xmm7;
+ }
+ return 0;
+}
+
+/*
+ * Basic CPU control in CR0
+ */
+#define X86_CR0_PE (1UL<<0) /* Protection Enable */
+#define X86_CR0_MP (1UL<<1) /* Monitor Coprocessor */
+#define X86_CR0_EM (1UL<<2) /* Emulation */
+#define X86_CR0_TS (1UL<<3) /* Task Switched */
+#define X86_CR0_ET (1UL<<4) /* Extension Type */
+#define X86_CR0_NE (1UL<<5) /* Numeric Error */
+#define X86_CR0_WP (1UL<<16) /* Write Protect */
+#define X86_CR0_AM (1UL<<18) /* Alignment Mask */
+#define X86_CR0_NW (1UL<<29) /* Not Write-through */
+#define X86_CR0_CD (1UL<<30) /* Cache Disable */
+#define X86_CR0_PG (1UL<<31) /* Paging */
+
+/*
+ * CPU model specific register (MSR) numbers.
+ */
+
+/* x86-64 specific MSRs */
+#define MSR_EFER 0xc0000080 /* extended feature register */
+#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */
+#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */
+#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */
+#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */
+#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */
+#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */
+#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */
+#define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */
+
+/* EFER bits: */
+#define EFER_SCE (1<<0) /* SYSCALL/SYSRET */
+#define EFER_LME (1<<8) /* Long mode enable */
+#define EFER_LMA (1<<10) /* Long mode active (read-only) */
+#define EFER_NX (1<<11) /* No execute enable */
+#define EFER_SVME (1<<12) /* Enable virtualization */
+#define EFER_LMSLE (1<<13) /* Long Mode Segment Limit Enable */
+#define EFER_FFXSR (1<<14) /* Enable Fast FXSAVE/FXRSTOR */
+
+/* Intel MSRs. Some also available on other CPUs */
+
+#define MSR_PPIN_CTL 0x0000004e
+#define MSR_PPIN 0x0000004f
+
+#define MSR_IA32_PERFCTR0 0x000000c1
+#define MSR_IA32_PERFCTR1 0x000000c2
+#define MSR_FSB_FREQ 0x000000cd
+#define MSR_PLATFORM_INFO 0x000000ce
+#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31
+#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT)
+
+#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
+#define NHM_C3_AUTO_DEMOTE (1UL << 25)
+#define NHM_C1_AUTO_DEMOTE (1UL << 26)
+#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25)
+#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
+#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
+
+#define MSR_MTRRcap 0x000000fe
+#define MSR_IA32_BBL_CR_CTL 0x00000119
+#define MSR_IA32_BBL_CR_CTL3 0x0000011e
+
+#define MSR_IA32_SYSENTER_CS 0x00000174
+#define MSR_IA32_SYSENTER_ESP 0x00000175
+#define MSR_IA32_SYSENTER_EIP 0x00000176
+
+#define MSR_IA32_MCG_CAP 0x00000179
+#define MSR_IA32_MCG_STATUS 0x0000017a
+#define MSR_IA32_MCG_CTL 0x0000017b
+#define MSR_IA32_MCG_EXT_CTL 0x000004d0
+
+#define MSR_OFFCORE_RSP_0 0x000001a6
+#define MSR_OFFCORE_RSP_1 0x000001a7
+#define MSR_TURBO_RATIO_LIMIT 0x000001ad
+#define MSR_TURBO_RATIO_LIMIT1 0x000001ae
+#define MSR_TURBO_RATIO_LIMIT2 0x000001af
+
+#define MSR_LBR_SELECT 0x000001c8
+#define MSR_LBR_TOS 0x000001c9
+#define MSR_LBR_NHM_FROM 0x00000680
+#define MSR_LBR_NHM_TO 0x000006c0
+#define MSR_LBR_CORE_FROM 0x00000040
+#define MSR_LBR_CORE_TO 0x00000060
+
+#define MSR_LBR_INFO_0 0x00000dc0 /* ... 0xddf for _31 */
+#define LBR_INFO_MISPRED BIT_ULL(63)
+#define LBR_INFO_IN_TX BIT_ULL(62)
+#define LBR_INFO_ABORT BIT_ULL(61)
+#define LBR_INFO_CYCLES 0xffff
+
+#define MSR_IA32_PEBS_ENABLE 0x000003f1
+#define MSR_IA32_DS_AREA 0x00000600
+#define MSR_IA32_PERF_CAPABILITIES 0x00000345
+#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
+
+#define MSR_IA32_RTIT_CTL 0x00000570
+#define MSR_IA32_RTIT_STATUS 0x00000571
+#define MSR_IA32_RTIT_ADDR0_A 0x00000580
+#define MSR_IA32_RTIT_ADDR0_B 0x00000581
+#define MSR_IA32_RTIT_ADDR1_A 0x00000582
+#define MSR_IA32_RTIT_ADDR1_B 0x00000583
+#define MSR_IA32_RTIT_ADDR2_A 0x00000584
+#define MSR_IA32_RTIT_ADDR2_B 0x00000585
+#define MSR_IA32_RTIT_ADDR3_A 0x00000586
+#define MSR_IA32_RTIT_ADDR3_B 0x00000587
+#define MSR_IA32_RTIT_CR3_MATCH 0x00000572
+#define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560
+#define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561
+
+#define MSR_MTRRfix64K_00000 0x00000250
+#define MSR_MTRRfix16K_80000 0x00000258
+#define MSR_MTRRfix16K_A0000 0x00000259
+#define MSR_MTRRfix4K_C0000 0x00000268
+#define MSR_MTRRfix4K_C8000 0x00000269
+#define MSR_MTRRfix4K_D0000 0x0000026a
+#define MSR_MTRRfix4K_D8000 0x0000026b
+#define MSR_MTRRfix4K_E0000 0x0000026c
+#define MSR_MTRRfix4K_E8000 0x0000026d
+#define MSR_MTRRfix4K_F0000 0x0000026e
+#define MSR_MTRRfix4K_F8000 0x0000026f
+#define MSR_MTRRdefType 0x000002ff
+
+#define MSR_IA32_CR_PAT 0x00000277
+
+#define MSR_IA32_DEBUGCTLMSR 0x000001d9
+#define MSR_IA32_LASTBRANCHFROMIP 0x000001db
+#define MSR_IA32_LASTBRANCHTOIP 0x000001dc
+#define MSR_IA32_LASTINTFROMIP 0x000001dd
+#define MSR_IA32_LASTINTTOIP 0x000001de
+
+/* DEBUGCTLMSR bits (others vary by model): */
+#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
+#define DEBUGCTLMSR_BTF_SHIFT 1
+#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
+#define DEBUGCTLMSR_TR (1UL << 6)
+#define DEBUGCTLMSR_BTS (1UL << 7)
+#define DEBUGCTLMSR_BTINT (1UL << 8)
+#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9)
+#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10)
+#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11)
+#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14
+#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT)
+
+#define MSR_PEBS_FRONTEND 0x000003f7
+
+#define MSR_IA32_POWER_CTL 0x000001fc
+
+#define MSR_IA32_MC0_CTL 0x00000400
+#define MSR_IA32_MC0_STATUS 0x00000401
+#define MSR_IA32_MC0_ADDR 0x00000402
+#define MSR_IA32_MC0_MISC 0x00000403
+
+/* C-state Residency Counters */
+#define MSR_PKG_C3_RESIDENCY 0x000003f8
+#define MSR_PKG_C6_RESIDENCY 0x000003f9
+#define MSR_ATOM_PKG_C6_RESIDENCY 0x000003fa
+#define MSR_PKG_C7_RESIDENCY 0x000003fa
+#define MSR_CORE_C3_RESIDENCY 0x000003fc
+#define MSR_CORE_C6_RESIDENCY 0x000003fd
+#define MSR_CORE_C7_RESIDENCY 0x000003fe
+#define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff
+#define MSR_PKG_C2_RESIDENCY 0x0000060d
+#define MSR_PKG_C8_RESIDENCY 0x00000630
+#define MSR_PKG_C9_RESIDENCY 0x00000631
+#define MSR_PKG_C10_RESIDENCY 0x00000632
+
+/* Interrupt Response Limit */
+#define MSR_PKGC3_IRTL 0x0000060a
+#define MSR_PKGC6_IRTL 0x0000060b
+#define MSR_PKGC7_IRTL 0x0000060c
+#define MSR_PKGC8_IRTL 0x00000633
+#define MSR_PKGC9_IRTL 0x00000634
+#define MSR_PKGC10_IRTL 0x00000635
+
+/* Run Time Average Power Limiting (RAPL) Interface */
+
+#define MSR_RAPL_POWER_UNIT 0x00000606
+
+#define MSR_PKG_POWER_LIMIT 0x00000610
+#define MSR_PKG_ENERGY_STATUS 0x00000611
+#define MSR_PKG_PERF_STATUS 0x00000613
+#define MSR_PKG_POWER_INFO 0x00000614
+
+#define MSR_DRAM_POWER_LIMIT 0x00000618
+#define MSR_DRAM_ENERGY_STATUS 0x00000619
+#define MSR_DRAM_PERF_STATUS 0x0000061b
+#define MSR_DRAM_POWER_INFO 0x0000061c
+
+#define MSR_PP0_POWER_LIMIT 0x00000638
+#define MSR_PP0_ENERGY_STATUS 0x00000639
+#define MSR_PP0_POLICY 0x0000063a
+#define MSR_PP0_PERF_STATUS 0x0000063b
+
+#define MSR_PP1_POWER_LIMIT 0x00000640
+#define MSR_PP1_ENERGY_STATUS 0x00000641
+#define MSR_PP1_POLICY 0x00000642
+
+/* Config TDP MSRs */
+#define MSR_CONFIG_TDP_NOMINAL 0x00000648
+#define MSR_CONFIG_TDP_LEVEL_1 0x00000649
+#define MSR_CONFIG_TDP_LEVEL_2 0x0000064A
+#define MSR_CONFIG_TDP_CONTROL 0x0000064B
+#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C
+
+#define MSR_PLATFORM_ENERGY_STATUS 0x0000064D
+
+#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658
+#define MSR_PKG_ANY_CORE_C0_RES 0x00000659
+#define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A
+#define MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B
+
+#define MSR_CORE_C1_RES 0x00000660
+#define MSR_MODULE_C6_RES_MS 0x00000664
+
+#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668
+#define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669
+
+#define MSR_ATOM_CORE_RATIOS 0x0000066a
+#define MSR_ATOM_CORE_VIDS 0x0000066b
+#define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c
+#define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d
+
+
+#define MSR_CORE_PERF_LIMIT_REASONS 0x00000690
+#define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0
+#define MSR_RING_PERF_LIMIT_REASONS 0x000006B1
+
+/* Hardware P state interface */
+#define MSR_PPERF 0x0000064e
+#define MSR_PERF_LIMIT_REASONS 0x0000064f
+#define MSR_PM_ENABLE 0x00000770
+#define MSR_HWP_CAPABILITIES 0x00000771
+#define MSR_HWP_REQUEST_PKG 0x00000772
+#define MSR_HWP_INTERRUPT 0x00000773
+#define MSR_HWP_REQUEST 0x00000774
+#define MSR_HWP_STATUS 0x00000777
+
+/* CPUID.6.EAX */
+#define HWP_BASE_BIT (1<<7)
+#define HWP_NOTIFICATIONS_BIT (1<<8)
+#define HWP_ACTIVITY_WINDOW_BIT (1<<9)
+#define HWP_ENERGY_PERF_PREFERENCE_BIT (1<<10)
+#define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11)
+
+/* IA32_HWP_CAPABILITIES */
+#define HWP_HIGHEST_PERF(x) (((x) >> 0) & 0xff)
+#define HWP_GUARANTEED_PERF(x) (((x) >> 8) & 0xff)
+#define HWP_MOSTEFFICIENT_PERF(x) (((x) >> 16) & 0xff)
+#define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff)
+
+/* IA32_HWP_REQUEST */
+#define HWP_MIN_PERF(x) (x & 0xff)
+#define HWP_MAX_PERF(x) ((x & 0xff) << 8)
+#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16)
+#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24)
+#define HWP_EPP_PERFORMANCE 0x00
+#define HWP_EPP_BALANCE_PERFORMANCE 0x80
+#define HWP_EPP_BALANCE_POWERSAVE 0xC0
+#define HWP_EPP_POWERSAVE 0xFF
+#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32)
+#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42)
+
+/* IA32_HWP_STATUS */
+#define HWP_GUARANTEED_CHANGE(x) (x & 0x1)
+#define HWP_EXCURSION_TO_MINIMUM(x) (x & 0x4)
+
+/* IA32_HWP_INTERRUPT */
+#define HWP_CHANGE_TO_GUARANTEED_INT(x) (x & 0x1)
+#define HWP_EXCURSION_TO_MINIMUM_INT(x) (x & 0x2)
+
+#define MSR_AMD64_MC0_MASK 0xc0010044
+
+#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x))
+#define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x))
+#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x))
+#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x))
+
+#define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x))
+
+/* These are consecutive and not in the normal 4er MCE bank block */
+#define MSR_IA32_MC0_CTL2 0x00000280
+#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x))
+
+#define MSR_P6_PERFCTR0 0x000000c1
+#define MSR_P6_PERFCTR1 0x000000c2
+#define MSR_P6_EVNTSEL0 0x00000186
+#define MSR_P6_EVNTSEL1 0x00000187
+
+#define MSR_KNC_PERFCTR0 0x00000020
+#define MSR_KNC_PERFCTR1 0x00000021
+#define MSR_KNC_EVNTSEL0 0x00000028
+#define MSR_KNC_EVNTSEL1 0x00000029
+
+/* Alternative perfctr range with full access. */
+#define MSR_IA32_PMC0 0x000004c1
+
+/* AMD64 MSRs. Not complete. See the architecture manual for a more
+ complete list. */
+
+#define MSR_AMD64_PATCH_LEVEL 0x0000008b
+#define MSR_AMD64_TSC_RATIO 0xc0000104
+#define MSR_AMD64_NB_CFG 0xc001001f
+#define MSR_AMD64_PATCH_LOADER 0xc0010020
+#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
+#define MSR_AMD64_OSVW_STATUS 0xc0010141
+#define MSR_AMD64_LS_CFG 0xc0011020
+#define MSR_AMD64_DC_CFG 0xc0011022
+#define MSR_AMD64_BU_CFG2 0xc001102a
+#define MSR_AMD64_IBSFETCHCTL 0xc0011030
+#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
+#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032
+#define MSR_AMD64_IBSFETCH_REG_COUNT 3
+#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1)
+#define MSR_AMD64_IBSOPCTL 0xc0011033
+#define MSR_AMD64_IBSOPRIP 0xc0011034
+#define MSR_AMD64_IBSOPDATA 0xc0011035
+#define MSR_AMD64_IBSOPDATA2 0xc0011036
+#define MSR_AMD64_IBSOPDATA3 0xc0011037
+#define MSR_AMD64_IBSDCLINAD 0xc0011038
+#define MSR_AMD64_IBSDCPHYSAD 0xc0011039
+#define MSR_AMD64_IBSOP_REG_COUNT 7
+#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
+#define MSR_AMD64_IBSCTL 0xc001103a
+#define MSR_AMD64_IBSBRTARGET 0xc001103b
+#define MSR_AMD64_IBSOPDATA4 0xc001103d
+#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
+#define MSR_AMD64_SEV 0xc0010131
+#define MSR_AMD64_SEV_ENABLED_BIT 0
+#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
+
+/* Fam 17h MSRs */
+#define MSR_F17H_IRPERF 0xc00000e9
+
+/* Fam 16h MSRs */
+#define MSR_F16H_L2I_PERF_CTL 0xc0010230
+#define MSR_F16H_L2I_PERF_CTR 0xc0010231
+#define MSR_F16H_DR1_ADDR_MASK 0xc0011019
+#define MSR_F16H_DR2_ADDR_MASK 0xc001101a
+#define MSR_F16H_DR3_ADDR_MASK 0xc001101b
+#define MSR_F16H_DR0_ADDR_MASK 0xc0011027
+
+/* Fam 15h MSRs */
+#define MSR_F15H_PERF_CTL 0xc0010200
+#define MSR_F15H_PERF_CTR 0xc0010201
+#define MSR_F15H_NB_PERF_CTL 0xc0010240
+#define MSR_F15H_NB_PERF_CTR 0xc0010241
+#define MSR_F15H_PTSC 0xc0010280
+#define MSR_F15H_IC_CFG 0xc0011021
+
+/* Fam 10h MSRs */
+#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
+#define FAM10H_MMIO_CONF_ENABLE (1<<0)
+#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf
+#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2
+#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
+#define FAM10H_MMIO_CONF_BASE_SHIFT 20
+#define MSR_FAM10H_NODE_ID 0xc001100c
+#define MSR_F10H_DECFG 0xc0011029
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
+
+/* K8 MSRs */
+#define MSR_K8_TOP_MEM1 0xc001001a
+#define MSR_K8_TOP_MEM2 0xc001001d
+#define MSR_K8_SYSCFG 0xc0010010
+#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23
+#define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT)
+#define MSR_K8_INT_PENDING_MSG 0xc0010055
+/* C1E active bits in int pending message */
+#define K8_INTP_C1E_ACTIVE_MASK 0x18000000
+#define MSR_K8_TSEG_ADDR 0xc0010112
+#define MSR_K8_TSEG_MASK 0xc0010113
+#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */
+#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */
+#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */
+
+/* K7 MSRs */
+#define MSR_K7_EVNTSEL0 0xc0010000
+#define MSR_K7_PERFCTR0 0xc0010004
+#define MSR_K7_EVNTSEL1 0xc0010001
+#define MSR_K7_PERFCTR1 0xc0010005
+#define MSR_K7_EVNTSEL2 0xc0010002
+#define MSR_K7_PERFCTR2 0xc0010006
+#define MSR_K7_EVNTSEL3 0xc0010003
+#define MSR_K7_PERFCTR3 0xc0010007
+#define MSR_K7_CLK_CTL 0xc001001b
+#define MSR_K7_HWCR 0xc0010015
+#define MSR_K7_HWCR_SMMLOCK_BIT 0
+#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
+#define MSR_K7_FID_VID_CTL 0xc0010041
+#define MSR_K7_FID_VID_STATUS 0xc0010042
+
+/* K6 MSRs */
+#define MSR_K6_WHCR 0xc0000082
+#define MSR_K6_UWCCR 0xc0000085
+#define MSR_K6_EPMR 0xc0000086
+#define MSR_K6_PSOR 0xc0000087
+#define MSR_K6_PFIR 0xc0000088
+
+/* Centaur-Hauls/IDT defined MSRs. */
+#define MSR_IDT_FCR1 0x00000107
+#define MSR_IDT_FCR2 0x00000108
+#define MSR_IDT_FCR3 0x00000109
+#define MSR_IDT_FCR4 0x0000010a
+
+#define MSR_IDT_MCR0 0x00000110
+#define MSR_IDT_MCR1 0x00000111
+#define MSR_IDT_MCR2 0x00000112
+#define MSR_IDT_MCR3 0x00000113
+#define MSR_IDT_MCR4 0x00000114
+#define MSR_IDT_MCR5 0x00000115
+#define MSR_IDT_MCR6 0x00000116
+#define MSR_IDT_MCR7 0x00000117
+#define MSR_IDT_MCR_CTRL 0x00000120
+
+/* VIA Cyrix defined MSRs*/
+#define MSR_VIA_FCR 0x00001107
+#define MSR_VIA_LONGHAUL 0x0000110a
+#define MSR_VIA_RNG 0x0000110b
+#define MSR_VIA_BCR2 0x00001147
+
+/* Transmeta defined MSRs */
+#define MSR_TMTA_LONGRUN_CTRL 0x80868010
+#define MSR_TMTA_LONGRUN_FLAGS 0x80868011
+#define MSR_TMTA_LRTI_READOUT 0x80868018
+#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a
+
+/* Intel defined MSRs. */
+#define MSR_IA32_P5_MC_ADDR 0x00000000
+#define MSR_IA32_P5_MC_TYPE 0x00000001
+#define MSR_IA32_TSC 0x00000010
+#define MSR_IA32_PLATFORM_ID 0x00000017
+#define MSR_IA32_EBL_CR_POWERON 0x0000002a
+#define MSR_EBC_FREQUENCY_ID 0x0000002c
+#define MSR_SMI_COUNT 0x00000034
+#define MSR_IA32_FEATURE_CONTROL 0x0000003a
+#define MSR_IA32_TSC_ADJUST 0x0000003b
+#define MSR_IA32_BNDCFGS 0x00000d90
+
+#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc
+
+#define MSR_IA32_XSS 0x00000da0
+
+#define FEATURE_CONTROL_LOCKED (1<<0)
+#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
+#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2)
+#define FEATURE_CONTROL_LMCE (1<<20)
+
+#define MSR_IA32_APICBASE 0x0000001b
+#define MSR_IA32_APICBASE_BSP (1<<8)
+#define MSR_IA32_APICBASE_ENABLE (1<<11)
+#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
+
+#define MSR_IA32_TSCDEADLINE 0x000006e0
+
+#define MSR_IA32_UCODE_WRITE 0x00000079
+#define MSR_IA32_UCODE_REV 0x0000008b
+
+#define MSR_IA32_SMM_MONITOR_CTL 0x0000009b
+#define MSR_IA32_SMBASE 0x0000009e
+
+#define MSR_IA32_PERF_STATUS 0x00000198
+#define MSR_IA32_PERF_CTL 0x00000199
+#define INTEL_PERF_CTL_MASK 0xffff
+#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
+#define MSR_AMD_PERF_STATUS 0xc0010063
+#define MSR_AMD_PERF_CTL 0xc0010062
+
+#define MSR_IA32_MPERF 0x000000e7
+#define MSR_IA32_APERF 0x000000e8
+
+#define MSR_IA32_THERM_CONTROL 0x0000019a
+#define MSR_IA32_THERM_INTERRUPT 0x0000019b
+
+#define THERM_INT_HIGH_ENABLE (1 << 0)
+#define THERM_INT_LOW_ENABLE (1 << 1)
+#define THERM_INT_PLN_ENABLE (1 << 24)
+
+#define MSR_IA32_THERM_STATUS 0x0000019c
+
+#define THERM_STATUS_PROCHOT (1 << 0)
+#define THERM_STATUS_POWER_LIMIT (1 << 10)
+
+#define MSR_THERM2_CTL 0x0000019d
+
+#define MSR_THERM2_CTL_TM_SELECT (1ULL << 16)
+
+#define MSR_IA32_MISC_ENABLE 0x000001a0
+
+#define MSR_IA32_TEMPERATURE_TARGET 0x000001a2
+
+#define MSR_MISC_FEATURE_CONTROL 0x000001a4
+#define MSR_MISC_PWR_MGMT 0x000001aa
+
+#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
+#define ENERGY_PERF_BIAS_PERFORMANCE 0
+#define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE 4
+#define ENERGY_PERF_BIAS_NORMAL 6
+#define ENERGY_PERF_BIAS_BALANCE_POWERSAVE 8
+#define ENERGY_PERF_BIAS_POWERSAVE 15
+
+#define MSR_IA32_PACKAGE_THERM_STATUS 0x000001b1
+
+#define PACKAGE_THERM_STATUS_PROCHOT (1 << 0)
+#define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10)
+
+#define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2
+
+#define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0)
+#define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1)
+#define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24)
+
+/* Thermal Thresholds Support */
+#define THERM_INT_THRESHOLD0_ENABLE (1 << 15)
+#define THERM_SHIFT_THRESHOLD0 8
+#define THERM_MASK_THRESHOLD0 (0x7f << THERM_SHIFT_THRESHOLD0)
+#define THERM_INT_THRESHOLD1_ENABLE (1 << 23)
+#define THERM_SHIFT_THRESHOLD1 16
+#define THERM_MASK_THRESHOLD1 (0x7f << THERM_SHIFT_THRESHOLD1)
+#define THERM_STATUS_THRESHOLD0 (1 << 6)
+#define THERM_LOG_THRESHOLD0 (1 << 7)
+#define THERM_STATUS_THRESHOLD1 (1 << 8)
+#define THERM_LOG_THRESHOLD1 (1 << 9)
+
+/* MISC_ENABLE bits: architectural */
+#define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT 0
+#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT)
+#define MSR_IA32_MISC_ENABLE_TCC_BIT 1
+#define MSR_IA32_MISC_ENABLE_TCC (1ULL << MSR_IA32_MISC_ENABLE_TCC_BIT)
+#define MSR_IA32_MISC_ENABLE_EMON_BIT 7
+#define MSR_IA32_MISC_ENABLE_EMON (1ULL << MSR_IA32_MISC_ENABLE_EMON_BIT)
+#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT 11
+#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT)
+#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT 12
+#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT)
+#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT 16
+#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT)
+#define MSR_IA32_MISC_ENABLE_MWAIT_BIT 18
+#define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT)
+#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT 22
+#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT)
+#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT 23
+#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT 34
+#define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT)
+
+/* MISC_ENABLE bits: model-specific, meaning may vary from core to core */
+#define MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT 2
+#define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT)
+#define MSR_IA32_MISC_ENABLE_TM1_BIT 3
+#define MSR_IA32_MISC_ENABLE_TM1 (1ULL << MSR_IA32_MISC_ENABLE_TM1_BIT)
+#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT 4
+#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT 6
+#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT 8
+#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT)
+#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT 9
+#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_FERR_BIT 10
+#define MSR_IA32_MISC_ENABLE_FERR (1ULL << MSR_IA32_MISC_ENABLE_FERR_BIT)
+#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT 10
+#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT)
+#define MSR_IA32_MISC_ENABLE_TM2_BIT 13
+#define MSR_IA32_MISC_ENABLE_TM2 (1ULL << MSR_IA32_MISC_ENABLE_TM2_BIT)
+#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT 19
+#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT 20
+#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT)
+#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT 24
+#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT)
+#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT 37
+#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT 38
+#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39
+#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT)
+
+/* MISC_FEATURES_ENABLES non-architectural features */
+#define MSR_MISC_FEATURES_ENABLES 0x00000140
+
+#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT 0
+#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT)
+#define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT 1
+
+#define MSR_IA32_TSC_DEADLINE 0x000006E0
+
+/* P4/Xeon+ specific */
+#define MSR_IA32_MCG_EAX 0x00000180
+#define MSR_IA32_MCG_EBX 0x00000181
+#define MSR_IA32_MCG_ECX 0x00000182
+#define MSR_IA32_MCG_EDX 0x00000183
+#define MSR_IA32_MCG_ESI 0x00000184
+#define MSR_IA32_MCG_EDI 0x00000185
+#define MSR_IA32_MCG_EBP 0x00000186
+#define MSR_IA32_MCG_ESP 0x00000187
+#define MSR_IA32_MCG_EFLAGS 0x00000188
+#define MSR_IA32_MCG_EIP 0x00000189
+#define MSR_IA32_MCG_RESERVED 0x0000018a
+
+/* Pentium IV performance counter MSRs */
+#define MSR_P4_BPU_PERFCTR0 0x00000300
+#define MSR_P4_BPU_PERFCTR1 0x00000301
+#define MSR_P4_BPU_PERFCTR2 0x00000302
+#define MSR_P4_BPU_PERFCTR3 0x00000303
+#define MSR_P4_MS_PERFCTR0 0x00000304
+#define MSR_P4_MS_PERFCTR1 0x00000305
+#define MSR_P4_MS_PERFCTR2 0x00000306
+#define MSR_P4_MS_PERFCTR3 0x00000307
+#define MSR_P4_FLAME_PERFCTR0 0x00000308
+#define MSR_P4_FLAME_PERFCTR1 0x00000309
+#define MSR_P4_FLAME_PERFCTR2 0x0000030a
+#define MSR_P4_FLAME_PERFCTR3 0x0000030b
+#define MSR_P4_IQ_PERFCTR0 0x0000030c
+#define MSR_P4_IQ_PERFCTR1 0x0000030d
+#define MSR_P4_IQ_PERFCTR2 0x0000030e
+#define MSR_P4_IQ_PERFCTR3 0x0000030f
+#define MSR_P4_IQ_PERFCTR4 0x00000310
+#define MSR_P4_IQ_PERFCTR5 0x00000311
+#define MSR_P4_BPU_CCCR0 0x00000360
+#define MSR_P4_BPU_CCCR1 0x00000361
+#define MSR_P4_BPU_CCCR2 0x00000362
+#define MSR_P4_BPU_CCCR3 0x00000363
+#define MSR_P4_MS_CCCR0 0x00000364
+#define MSR_P4_MS_CCCR1 0x00000365
+#define MSR_P4_MS_CCCR2 0x00000366
+#define MSR_P4_MS_CCCR3 0x00000367
+#define MSR_P4_FLAME_CCCR0 0x00000368
+#define MSR_P4_FLAME_CCCR1 0x00000369
+#define MSR_P4_FLAME_CCCR2 0x0000036a
+#define MSR_P4_FLAME_CCCR3 0x0000036b
+#define MSR_P4_IQ_CCCR0 0x0000036c
+#define MSR_P4_IQ_CCCR1 0x0000036d
+#define MSR_P4_IQ_CCCR2 0x0000036e
+#define MSR_P4_IQ_CCCR3 0x0000036f
+#define MSR_P4_IQ_CCCR4 0x00000370
+#define MSR_P4_IQ_CCCR5 0x00000371
+#define MSR_P4_ALF_ESCR0 0x000003ca
+#define MSR_P4_ALF_ESCR1 0x000003cb
+#define MSR_P4_BPU_ESCR0 0x000003b2
+#define MSR_P4_BPU_ESCR1 0x000003b3
+#define MSR_P4_BSU_ESCR0 0x000003a0
+#define MSR_P4_BSU_ESCR1 0x000003a1
+#define MSR_P4_CRU_ESCR0 0x000003b8
+#define MSR_P4_CRU_ESCR1 0x000003b9
+#define MSR_P4_CRU_ESCR2 0x000003cc
+#define MSR_P4_CRU_ESCR3 0x000003cd
+#define MSR_P4_CRU_ESCR4 0x000003e0
+#define MSR_P4_CRU_ESCR5 0x000003e1
+#define MSR_P4_DAC_ESCR0 0x000003a8
+#define MSR_P4_DAC_ESCR1 0x000003a9
+#define MSR_P4_FIRM_ESCR0 0x000003a4
+#define MSR_P4_FIRM_ESCR1 0x000003a5
+#define MSR_P4_FLAME_ESCR0 0x000003a6
+#define MSR_P4_FLAME_ESCR1 0x000003a7
+#define MSR_P4_FSB_ESCR0 0x000003a2
+#define MSR_P4_FSB_ESCR1 0x000003a3
+#define MSR_P4_IQ_ESCR0 0x000003ba
+#define MSR_P4_IQ_ESCR1 0x000003bb
+#define MSR_P4_IS_ESCR0 0x000003b4
+#define MSR_P4_IS_ESCR1 0x000003b5
+#define MSR_P4_ITLB_ESCR0 0x000003b6
+#define MSR_P4_ITLB_ESCR1 0x000003b7
+#define MSR_P4_IX_ESCR0 0x000003c8
+#define MSR_P4_IX_ESCR1 0x000003c9
+#define MSR_P4_MOB_ESCR0 0x000003aa
+#define MSR_P4_MOB_ESCR1 0x000003ab
+#define MSR_P4_MS_ESCR0 0x000003c0
+#define MSR_P4_MS_ESCR1 0x000003c1
+#define MSR_P4_PMH_ESCR0 0x000003ac
+#define MSR_P4_PMH_ESCR1 0x000003ad
+#define MSR_P4_RAT_ESCR0 0x000003bc
+#define MSR_P4_RAT_ESCR1 0x000003bd
+#define MSR_P4_SAAT_ESCR0 0x000003ae
+#define MSR_P4_SAAT_ESCR1 0x000003af
+#define MSR_P4_SSU_ESCR0 0x000003be
+#define MSR_P4_SSU_ESCR1 0x000003bf /* guess: not in manual */
+
+#define MSR_P4_TBPU_ESCR0 0x000003c2
+#define MSR_P4_TBPU_ESCR1 0x000003c3
+#define MSR_P4_TC_ESCR0 0x000003c4
+#define MSR_P4_TC_ESCR1 0x000003c5
+#define MSR_P4_U2L_ESCR0 0x000003b0
+#define MSR_P4_U2L_ESCR1 0x000003b1
+
+#define MSR_P4_PEBS_MATRIX_VERT 0x000003f2
+
+/* Intel Core-based CPU performance counters */
+#define MSR_CORE_PERF_FIXED_CTR0 0x00000309
+#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a
+#define MSR_CORE_PERF_FIXED_CTR2 0x0000030b
+#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d
+#define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e
+#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390
+
+/* Geode defined MSRs */
+#define MSR_GEODE_BUSCONT_CONF0 0x00001900
+
+/* Intel VT MSRs */
+#define MSR_IA32_VMX_BASIC 0x00000480
+#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481
+#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482
+#define MSR_IA32_VMX_EXIT_CTLS 0x00000483
+#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484
+#define MSR_IA32_VMX_MISC 0x00000485
+#define MSR_IA32_VMX_CR0_FIXED0 0x00000486
+#define MSR_IA32_VMX_CR0_FIXED1 0x00000487
+#define MSR_IA32_VMX_CR4_FIXED0 0x00000488
+#define MSR_IA32_VMX_CR4_FIXED1 0x00000489
+#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a
+#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b
+#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c
+#define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x0000048d
+#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e
+#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f
+#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490
+#define MSR_IA32_VMX_VMFUNC 0x00000491
+
+/* VMX_BASIC bits and bitmasks */
+#define VMX_BASIC_VMCS_SIZE_SHIFT 32
+#define VMX_BASIC_TRUE_CTLS (1ULL << 55)
+#define VMX_BASIC_64 0x0001000000000000LLU
+#define VMX_BASIC_MEM_TYPE_SHIFT 50
+#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU
+#define VMX_BASIC_MEM_TYPE_WB 6LLU
+#define VMX_BASIC_INOUT 0x0040000000000000LLU
+
+/* MSR_IA32_VMX_MISC bits */
+#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
+#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
+/* AMD-V MSRs */
+
+#define MSR_VM_CR 0xc0010114
+#define MSR_VM_IGNNE 0xc0010115
+#define MSR_VM_HSAVE_PA 0xc0010117
+
+#endif /* !SELFTEST_KVM_X86_H */
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c
new file mode 100644
index 000000000000..c9f5b7d4ce38
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -0,0 +1,87 @@
+/*
+ * tools/testing/selftests/kvm/lib/assert.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#define _GNU_SOURCE /* for getline(3) and strchrnul(3)*/
+
+#include "test_util.h"
+
+#include <execinfo.h>
+#include <sys/syscall.h>
+
+/* Dumps the current stack trace to stderr. */
+static void __attribute__((noinline)) test_dump_stack(void);
+static void test_dump_stack(void)
+{
+ /*
+ * Build and run this command:
+ *
+ * addr2line -s -e /proc/$PPID/exe -fpai {backtrace addresses} | \
+ * grep -v test_dump_stack | cat -n 1>&2
+ *
+ * Note that the spacing is different and there's no newline.
+ */
+ size_t i;
+ size_t n = 20;
+ void *stack[n];
+ const char *addr2line = "addr2line -s -e /proc/$PPID/exe -fpai";
+ const char *pipeline = "|cat -n 1>&2";
+ char cmd[strlen(addr2line) + strlen(pipeline) +
+ /* N bytes per addr * 2 digits per byte + 1 space per addr: */
+ n * (((sizeof(void *)) * 2) + 1) +
+ /* Null terminator: */
+ 1];
+ char *c;
+
+ n = backtrace(stack, n);
+ c = &cmd[0];
+ c += sprintf(c, "%s", addr2line);
+ /*
+ * Skip the first 3 frames: backtrace, test_dump_stack, and
+ * test_assert. We hope that backtrace isn't inlined and the other two
+ * we've declared noinline.
+ */
+ for (i = 2; i < n; i++)
+ c += sprintf(c, " %lx", ((unsigned long) stack[i]) - 1);
+ c += sprintf(c, "%s", pipeline);
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-result"
+ system(cmd);
+#pragma GCC diagnostic pop
+}
+
+static pid_t gettid(void)
+{
+ return syscall(SYS_gettid);
+}
+
+void __attribute__((noinline))
+test_assert(bool exp, const char *exp_str,
+ const char *file, unsigned int line, const char *fmt, ...)
+{
+ va_list ap;
+
+ if (!(exp)) {
+ va_start(ap, fmt);
+
+ fprintf(stderr, "==== Test Assertion Failure ====\n"
+ " %s:%u: %s\n"
+ " pid=%d tid=%d\n",
+ file, line, exp_str, getpid(), gettid());
+ test_dump_stack();
+ if (fmt) {
+ fputs(" ", stderr);
+ vfprintf(stderr, fmt, ap);
+ fputs("\n", stderr);
+ }
+ va_end(ap);
+
+ exit(254);
+ }
+
+ return;
+}
diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c
new file mode 100644
index 000000000000..5eb857584aa3
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/elf.c
@@ -0,0 +1,197 @@
+/*
+ * tools/testing/selftests/kvm/lib/elf.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#include "test_util.h"
+
+#include <bits/endian.h>
+#include <linux/elf.h>
+
+#include "kvm_util.h"
+#include "kvm_util_internal.h"
+
+static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp)
+{
+ off_t offset_rv;
+
+ /* Open the ELF file. */
+ int fd;
+ fd = open(filename, O_RDONLY);
+ TEST_ASSERT(fd >= 0, "Failed to open ELF file,\n"
+ " filename: %s\n"
+ " rv: %i errno: %i", filename, fd, errno);
+
+ /* Read in and validate ELF Identification Record.
+ * The ELF Identification record is the first 16 (EI_NIDENT) bytes
+ * of the ELF header, which is at the beginning of the ELF file.
+ * For now it is only safe to read the first EI_NIDENT bytes. Once
+ * read and validated, the value of e_ehsize can be used to determine
+ * the real size of the ELF header.
+ */
+ unsigned char ident[EI_NIDENT];
+ test_read(fd, ident, sizeof(ident));
+ TEST_ASSERT((ident[EI_MAG0] == ELFMAG0) && (ident[EI_MAG1] == ELFMAG1)
+ && (ident[EI_MAG2] == ELFMAG2) && (ident[EI_MAG3] == ELFMAG3),
+ "ELF MAGIC Mismatch,\n"
+ " filename: %s\n"
+ " ident[EI_MAG0 - EI_MAG3]: %02x %02x %02x %02x\n"
+ " Expected: %02x %02x %02x %02x",
+ filename,
+ ident[EI_MAG0], ident[EI_MAG1], ident[EI_MAG2], ident[EI_MAG3],
+ ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3);
+ TEST_ASSERT(ident[EI_CLASS] == ELFCLASS64,
+ "Current implementation only able to handle ELFCLASS64,\n"
+ " filename: %s\n"
+ " ident[EI_CLASS]: %02x\n"
+ " expected: %02x",
+ filename,
+ ident[EI_CLASS], ELFCLASS64);
+ TEST_ASSERT(((BYTE_ORDER == LITTLE_ENDIAN)
+ && (ident[EI_DATA] == ELFDATA2LSB))
+ || ((BYTE_ORDER == BIG_ENDIAN)
+ && (ident[EI_DATA] == ELFDATA2MSB)), "Current "
+ "implementation only able to handle\n"
+ "cases where the host and ELF file endianness\n"
+ "is the same:\n"
+ " host BYTE_ORDER: %u\n"
+ " host LITTLE_ENDIAN: %u\n"
+ " host BIG_ENDIAN: %u\n"
+ " ident[EI_DATA]: %u\n"
+ " ELFDATA2LSB: %u\n"
+ " ELFDATA2MSB: %u",
+ BYTE_ORDER, LITTLE_ENDIAN, BIG_ENDIAN,
+ ident[EI_DATA], ELFDATA2LSB, ELFDATA2MSB);
+ TEST_ASSERT(ident[EI_VERSION] == EV_CURRENT,
+ "Current implementation only able to handle current "
+ "ELF version,\n"
+ " filename: %s\n"
+ " ident[EI_VERSION]: %02x\n"
+ " expected: %02x",
+ filename, ident[EI_VERSION], EV_CURRENT);
+
+ /* Read in the ELF header.
+ * With the ELF Identification portion of the ELF header
+ * validated, especially that the value at EI_VERSION is
+ * as expected, it is now safe to read the entire ELF header.
+ */
+ offset_rv = lseek(fd, 0, SEEK_SET);
+ TEST_ASSERT(offset_rv == 0, "Seek to ELF header failed,\n"
+ " rv: %zi expected: %i", offset_rv, 0);
+ test_read(fd, hdrp, sizeof(*hdrp));
+ TEST_ASSERT(hdrp->e_phentsize == sizeof(Elf64_Phdr),
+ "Unexpected physical header size,\n"
+ " hdrp->e_phentsize: %x\n"
+ " expected: %zx",
+ hdrp->e_phentsize, sizeof(Elf64_Phdr));
+ TEST_ASSERT(hdrp->e_shentsize == sizeof(Elf64_Shdr),
+ "Unexpected section header size,\n"
+ " hdrp->e_shentsize: %x\n"
+ " expected: %zx",
+ hdrp->e_shentsize, sizeof(Elf64_Shdr));
+}
+
+/* VM ELF Load
+ *
+ * Input Args:
+ * filename - Path to ELF file
+ *
+ * Output Args: None
+ *
+ * Input/Output Args:
+ * vm - Pointer to opaque type that describes the VM.
+ *
+ * Return: None, TEST_ASSERT failures for all error conditions
+ *
+ * Loads the program image of the ELF file specified by filename,
+ * into the virtual address space of the VM pointed to by vm. On entry
+ * the VM needs to not be using any of the virtual address space used
+ * by the image and it needs to have sufficient available physical pages, to
+ * back the virtual pages used to load the image.
+ */
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
+ uint32_t data_memslot, uint32_t pgd_memslot)
+{
+ off_t offset, offset_rv;
+ Elf64_Ehdr hdr;
+
+ /* Open the ELF file. */
+ int fd;
+ fd = open(filename, O_RDONLY);
+ TEST_ASSERT(fd >= 0, "Failed to open ELF file,\n"
+ " filename: %s\n"
+ " rv: %i errno: %i", filename, fd, errno);
+
+ /* Read in the ELF header. */
+ elfhdr_get(filename, &hdr);
+
+ /* For each program header.
+ * The following ELF header members specify the location
+ * and size of the program headers:
+ *
+ * e_phoff - File offset to start of program headers
+ * e_phentsize - Size of each program header
+ * e_phnum - Number of program header entries
+ */
+ for (unsigned int n1 = 0; n1 < hdr.e_phnum; n1++) {
+ /* Seek to the beginning of the program header. */
+ offset = hdr.e_phoff + (n1 * hdr.e_phentsize);
+ offset_rv = lseek(fd, offset, SEEK_SET);
+ TEST_ASSERT(offset_rv == offset,
+ "Failed to seek to begining of program header %u,\n"
+ " filename: %s\n"
+ " rv: %jd errno: %i",
+ n1, filename, (intmax_t) offset_rv, errno);
+
+ /* Read in the program header. */
+ Elf64_Phdr phdr;
+ test_read(fd, &phdr, sizeof(phdr));
+
+ /* Skip if this header doesn't describe a loadable segment. */
+ if (phdr.p_type != PT_LOAD)
+ continue;
+
+ /* Allocate memory for this segment within the VM. */
+ TEST_ASSERT(phdr.p_memsz > 0, "Unexpected loadable segment "
+ "memsize of 0,\n"
+ " phdr index: %u p_memsz: 0x%" PRIx64,
+ n1, (uint64_t) phdr.p_memsz);
+ vm_vaddr_t seg_vstart = phdr.p_vaddr;
+ seg_vstart &= ~(vm_vaddr_t)(vm->page_size - 1);
+ vm_vaddr_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1;
+ seg_vend |= vm->page_size - 1;
+ size_t seg_size = seg_vend - seg_vstart + 1;
+
+ vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart,
+ data_memslot, pgd_memslot);
+ TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate "
+ "virtual memory for segment at requested min addr,\n"
+ " segment idx: %u\n"
+ " seg_vstart: 0x%lx\n"
+ " vaddr: 0x%lx",
+ n1, seg_vstart, vaddr);
+ memset(addr_gva2hva(vm, vaddr), 0, seg_size);
+ /* TODO(lhuemill): Set permissions of each memory segment
+ * based on the least-significant 3 bits of phdr.p_flags.
+ */
+
+ /* Load portion of initial state that is contained within
+ * the ELF file.
+ */
+ if (phdr.p_filesz) {
+ offset_rv = lseek(fd, phdr.p_offset, SEEK_SET);
+ TEST_ASSERT(offset_rv == phdr.p_offset,
+ "Seek to program segment offset failed,\n"
+ " program header idx: %u errno: %i\n"
+ " offset_rv: 0x%jx\n"
+ " expected: 0x%jx\n",
+ n1, errno, (intmax_t) offset_rv,
+ (intmax_t) phdr.p_offset);
+ test_read(fd, addr_gva2hva(vm, phdr.p_vaddr),
+ phdr.p_filesz);
+ }
+ }
+}
diff --git a/tools/testing/selftests/kvm/lib/io.c b/tools/testing/selftests/kvm/lib/io.c
new file mode 100644
index 000000000000..cff869ffe6ee
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/io.c
@@ -0,0 +1,158 @@
+/*
+ * tools/testing/selftests/kvm/lib/io.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#include "test_util.h"
+
+/* Test Write
+ *
+ * A wrapper for write(2), that automatically handles the following
+ * special conditions:
+ *
+ * + Interrupted system call (EINTR)
+ * + Write of less than requested amount
+ * + Non-block return (EAGAIN)
+ *
+ * For each of the above, an additional write is performed to automatically
+ * continue writing the requested data.
+ * There are also many cases where write(2) can return an unexpected
+ * error (e.g. EIO). Such errors cause a TEST_ASSERT failure.
+ *
+ * Note, for function signature compatibility with write(2), this function
+ * returns the number of bytes written, but that value will always be equal
+ * to the number of requested bytes. All other conditions in this and
+ * future enhancements to this function either automatically issue another
+ * write(2) or cause a TEST_ASSERT failure.
+ *
+ * Args:
+ * fd - Opened file descriptor to file to be written.
+ * count - Number of bytes to write.
+ *
+ * Output:
+ * buf - Starting address of data to be written.
+ *
+ * Return:
+ * On success, number of bytes written.
+ * On failure, a TEST_ASSERT failure is caused.
+ */
+ssize_t test_write(int fd, const void *buf, size_t count)
+{
+ ssize_t rc;
+ ssize_t num_written = 0;
+ size_t num_left = count;
+ const char *ptr = buf;
+
+ /* Note: Count of zero is allowed (see "RETURN VALUE" portion of
+ * write(2) manpage for details.
+ */
+ TEST_ASSERT(count >= 0, "Unexpected count, count: %li", count);
+
+ do {
+ rc = write(fd, ptr, num_left);
+
+ switch (rc) {
+ case -1:
+ TEST_ASSERT(errno == EAGAIN || errno == EINTR,
+ "Unexpected write failure,\n"
+ " rc: %zi errno: %i", rc, errno);
+ continue;
+
+ case 0:
+ TEST_ASSERT(false, "Unexpected EOF,\n"
+ " rc: %zi num_written: %zi num_left: %zu",
+ rc, num_written, num_left);
+ break;
+
+ default:
+ TEST_ASSERT(rc >= 0, "Unexpected ret from write,\n"
+ " rc: %zi errno: %i", rc, errno);
+ num_written += rc;
+ num_left -= rc;
+ ptr += rc;
+ break;
+ }
+ } while (num_written < count);
+
+ return num_written;
+}
+
+/* Test Read
+ *
+ * A wrapper for read(2), that automatically handles the following
+ * special conditions:
+ *
+ * + Interrupted system call (EINTR)
+ * + Read of less than requested amount
+ * + Non-block return (EAGAIN)
+ *
+ * For each of the above, an additional read is performed to automatically
+ * continue reading the requested data.
+ * There are also many cases where read(2) can return an unexpected
+ * error (e.g. EIO). Such errors cause a TEST_ASSERT failure. Note,
+ * it is expected that the file opened by fd at the current file position
+ * contains at least the number of requested bytes to be read. A TEST_ASSERT
+ * failure is produced if an End-Of-File condition occurs, before all the
+ * data is read. It is the callers responsibility to assure that sufficient
+ * data exists.
+ *
+ * Note, for function signature compatibility with read(2), this function
+ * returns the number of bytes read, but that value will always be equal
+ * to the number of requested bytes. All other conditions in this and
+ * future enhancements to this function either automatically issue another
+ * read(2) or cause a TEST_ASSERT failure.
+ *
+ * Args:
+ * fd - Opened file descriptor to file to be read.
+ * count - Number of bytes to read.
+ *
+ * Output:
+ * buf - Starting address of where to write the bytes read.
+ *
+ * Return:
+ * On success, number of bytes read.
+ * On failure, a TEST_ASSERT failure is caused.
+ */
+ssize_t test_read(int fd, void *buf, size_t count)
+{
+ ssize_t rc;
+ ssize_t num_read = 0;
+ size_t num_left = count;
+ char *ptr = buf;
+
+ /* Note: Count of zero is allowed (see "If count is zero" portion of
+ * read(2) manpage for details.
+ */
+ TEST_ASSERT(count >= 0, "Unexpected count, count: %li", count);
+
+ do {
+ rc = read(fd, ptr, num_left);
+
+ switch (rc) {
+ case -1:
+ TEST_ASSERT(errno == EAGAIN || errno == EINTR,
+ "Unexpected read failure,\n"
+ " rc: %zi errno: %i", rc, errno);
+ break;
+
+ case 0:
+ TEST_ASSERT(false, "Unexpected EOF,\n"
+ " rc: %zi num_read: %zi num_left: %zu",
+ rc, num_read, num_left);
+ break;
+
+ default:
+ TEST_ASSERT(rc > 0, "Unexpected ret from read,\n"
+ " rc: %zi errno: %i", rc, errno);
+ num_read += rc;
+ num_left -= rc;
+ ptr += rc;
+ break;
+ }
+ } while (num_read < count);
+
+ return num_read;
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
new file mode 100644
index 000000000000..2cedfda181d4
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -0,0 +1,1486 @@
+/*
+ * tools/testing/selftests/kvm/lib/kvm_util.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kvm_util_internal.h"
+
+#include <assert.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#define KVM_DEV_PATH "/dev/kvm"
+
+#define KVM_UTIL_PGS_PER_HUGEPG 512
+#define KVM_UTIL_MIN_PADDR 0x2000
+
+/* Aligns x up to the next multiple of size. Size must be a power of 2. */
+static void *align(void *x, size_t size)
+{
+ size_t mask = size - 1;
+ TEST_ASSERT(size != 0 && !(size & (size - 1)),
+ "size not a power of 2: %lu", size);
+ return (void *) (((size_t) x + mask) & ~mask);
+}
+
+/* Capability
+ *
+ * Input Args:
+ * cap - Capability
+ *
+ * Output Args: None
+ *
+ * Return:
+ * On success, the Value corresponding to the capability (KVM_CAP_*)
+ * specified by the value of cap. On failure a TEST_ASSERT failure
+ * is produced.
+ *
+ * Looks up and returns the value corresponding to the capability
+ * (KVM_CAP_*) given by cap.
+ */
+int kvm_check_cap(long cap)
+{
+ int ret;
+ int kvm_fd;
+
+ kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+ TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i",
+ KVM_DEV_PATH, kvm_fd, errno);
+
+ ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
+ TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n"
+ " rc: %i errno: %i", ret, errno);
+
+ close(kvm_fd);
+
+ return ret;
+}
+
+/* VM Create
+ *
+ * Input Args:
+ * mode - VM Mode (e.g. VM_MODE_FLAT48PG)
+ * phy_pages - Physical memory pages
+ * perm - permission
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to opaque structure that describes the created VM.
+ *
+ * Creates a VM with the mode specified by mode (e.g. VM_MODE_FLAT48PG).
+ * When phy_pages is non-zero, a memory region of phy_pages physical pages
+ * is created and mapped starting at guest physical address 0. The file
+ * descriptor to control the created VM is created with the permissions
+ * given by perm (e.g. O_RDWR).
+ */
+struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
+{
+ struct kvm_vm *vm;
+ int kvm_fd;
+
+ /* Allocate memory. */
+ vm = calloc(1, sizeof(*vm));
+ TEST_ASSERT(vm != NULL, "Insufficent Memory");
+
+ vm->mode = mode;
+ kvm_fd = open(KVM_DEV_PATH, perm);
+ TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i",
+ KVM_DEV_PATH, kvm_fd, errno);
+
+ /* Create VM. */
+ vm->fd = ioctl(kvm_fd, KVM_CREATE_VM, NULL);
+ TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
+ "rc: %i errno: %i", vm->fd, errno);
+
+ close(kvm_fd);
+
+ /* Setup mode specific traits. */
+ switch (vm->mode) {
+ case VM_MODE_FLAT48PG:
+ vm->page_size = 0x1000;
+ vm->page_shift = 12;
+
+ /* Limit to 48-bit canonical virtual addresses. */
+ vm->vpages_valid = sparsebit_alloc();
+ sparsebit_set_num(vm->vpages_valid,
+ 0, (1ULL << (48 - 1)) >> vm->page_shift);
+ sparsebit_set_num(vm->vpages_valid,
+ (~((1ULL << (48 - 1)) - 1)) >> vm->page_shift,
+ (1ULL << (48 - 1)) >> vm->page_shift);
+
+ /* Limit physical addresses to 52-bits. */
+ vm->max_gfn = ((1ULL << 52) >> vm->page_shift) - 1;
+ break;
+
+ default:
+ TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
+ }
+
+ /* Allocate and setup memory for guest. */
+ vm->vpages_mapped = sparsebit_alloc();
+ if (phy_pages != 0)
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ 0, 0, phy_pages, 0);
+
+ return vm;
+}
+
+/* Userspace Memory Region Find
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * start - Starting VM physical address
+ * end - Ending VM physical address, inclusive.
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to overlapping region, NULL if no such region.
+ *
+ * Searches for a region with any physical memory that overlaps with
+ * any portion of the guest physical addresses from start to end
+ * inclusive. If multiple overlapping regions exist, a pointer to any
+ * of the regions is returned. Null is returned only when no overlapping
+ * region exists.
+ */
+static struct userspace_mem_region *userspace_mem_region_find(
+ struct kvm_vm *vm, uint64_t start, uint64_t end)
+{
+ struct userspace_mem_region *region;
+
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ uint64_t existing_start = region->region.guest_phys_addr;
+ uint64_t existing_end = region->region.guest_phys_addr
+ + region->region.memory_size - 1;
+ if (start <= existing_end && end >= existing_start)
+ return region;
+ }
+
+ return NULL;
+}
+
+/* KVM Userspace Memory Region Find
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * start - Starting VM physical address
+ * end - Ending VM physical address, inclusive.
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to overlapping region, NULL if no such region.
+ *
+ * Public interface to userspace_mem_region_find. Allows tests to look up
+ * the memslot datastructure for a given range of guest physical memory.
+ */
+struct kvm_userspace_memory_region *
+kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
+ uint64_t end)
+{
+ struct userspace_mem_region *region;
+
+ region = userspace_mem_region_find(vm, start, end);
+ if (!region)
+ return NULL;
+
+ return &region->region;
+}
+
+/* VCPU Find
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to VCPU structure
+ *
+ * Locates a vcpu structure that describes the VCPU specified by vcpuid and
+ * returns a pointer to it. Returns NULL if the VM doesn't contain a VCPU
+ * for the specified vcpuid.
+ */
+struct vcpu *vcpu_find(struct kvm_vm *vm,
+ uint32_t vcpuid)
+{
+ struct vcpu *vcpup;
+
+ for (vcpup = vm->vcpu_head; vcpup; vcpup = vcpup->next) {
+ if (vcpup->id == vcpuid)
+ return vcpup;
+ }
+
+ return NULL;
+}
+
+/* VM VCPU Remove
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return: None, TEST_ASSERT failures for all error conditions
+ *
+ * Within the VM specified by vm, removes the VCPU given by vcpuid.
+ */
+static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+
+ int ret = close(vcpu->fd);
+ TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
+ "errno: %i", ret, errno);
+
+ if (vcpu->next)
+ vcpu->next->prev = vcpu->prev;
+ if (vcpu->prev)
+ vcpu->prev->next = vcpu->next;
+ else
+ vm->vcpu_head = vcpu->next;
+ free(vcpu);
+}
+
+
+/* Destroys and frees the VM pointed to by vmp.
+ */
+void kvm_vm_free(struct kvm_vm *vmp)
+{
+ int ret;
+
+ if (vmp == NULL)
+ return;
+
+ /* Free userspace_mem_regions. */
+ while (vmp->userspace_mem_region_head) {
+ struct userspace_mem_region *region
+ = vmp->userspace_mem_region_head;
+
+ region->region.memory_size = 0;
+ ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION,
+ &region->region);
+ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
+ "rc: %i errno: %i", ret, errno);
+
+ vmp->userspace_mem_region_head = region->next;
+ sparsebit_free(&region->unused_phy_pages);
+ ret = munmap(region->mmap_start, region->mmap_size);
+ TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i",
+ ret, errno);
+
+ free(region);
+ }
+
+ /* Free VCPUs. */
+ while (vmp->vcpu_head)
+ vm_vcpu_rm(vmp, vmp->vcpu_head->id);
+
+ /* Free sparsebit arrays. */
+ sparsebit_free(&vmp->vpages_valid);
+ sparsebit_free(&vmp->vpages_mapped);
+
+ /* Close file descriptor for the VM. */
+ ret = close(vmp->fd);
+ TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
+ " vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
+
+ /* Free the structure describing the VM. */
+ free(vmp);
+}
+
+/* Memory Compare, host virtual to guest virtual
+ *
+ * Input Args:
+ * hva - Starting host virtual address
+ * vm - Virtual Machine
+ * gva - Starting guest virtual address
+ * len - number of bytes to compare
+ *
+ * Output Args: None
+ *
+ * Input/Output Args: None
+ *
+ * Return:
+ * Returns 0 if the bytes starting at hva for a length of len
+ * are equal the guest virtual bytes starting at gva. Returns
+ * a value < 0, if bytes at hva are less than those at gva.
+ * Otherwise a value > 0 is returned.
+ *
+ * Compares the bytes starting at the host virtual address hva, for
+ * a length of len, to the guest bytes starting at the guest virtual
+ * address given by gva.
+ */
+int kvm_memcmp_hva_gva(void *hva,
+ struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
+{
+ size_t amt;
+
+ /* Compare a batch of bytes until either a match is found
+ * or all the bytes have been compared.
+ */
+ for (uintptr_t offset = 0; offset < len; offset += amt) {
+ uintptr_t ptr1 = (uintptr_t)hva + offset;
+
+ /* Determine host address for guest virtual address
+ * at offset.
+ */
+ uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset);
+
+ /* Determine amount to compare on this pass.
+ * Don't allow the comparsion to cross a page boundary.
+ */
+ amt = len - offset;
+ if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift))
+ amt = vm->page_size - (ptr1 % vm->page_size);
+ if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift))
+ amt = vm->page_size - (ptr2 % vm->page_size);
+
+ assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift));
+ assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift));
+
+ /* Perform the comparison. If there is a difference
+ * return that result to the caller, otherwise need
+ * to continue on looking for a mismatch.
+ */
+ int ret = memcmp((void *)ptr1, (void *)ptr2, amt);
+ if (ret != 0)
+ return ret;
+ }
+
+ /* No mismatch found. Let the caller know the two memory
+ * areas are equal.
+ */
+ return 0;
+}
+
+/* Allocate an instance of struct kvm_cpuid2
+ *
+ * Input Args: None
+ *
+ * Output Args: None
+ *
+ * Return: A pointer to the allocated struct. The caller is responsible
+ * for freeing this struct.
+ *
+ * Since kvm_cpuid2 uses a 0-length array to allow a the size of the
+ * array to be decided at allocation time, allocation is slightly
+ * complicated. This function uses a reasonable default length for
+ * the array and performs the appropriate allocation.
+ */
+static struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
+{
+ struct kvm_cpuid2 *cpuid;
+ int nent = 100;
+ size_t size;
+
+ size = sizeof(*cpuid);
+ size += nent * sizeof(struct kvm_cpuid_entry2);
+ cpuid = malloc(size);
+ if (!cpuid) {
+ perror("malloc");
+ abort();
+ }
+
+ cpuid->nent = nent;
+
+ return cpuid;
+}
+
+/* KVM Supported CPUID Get
+ *
+ * Input Args: None
+ *
+ * Output Args:
+ *
+ * Return: The supported KVM CPUID
+ *
+ * Get the guest CPUID supported by KVM.
+ */
+struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
+{
+ static struct kvm_cpuid2 *cpuid;
+ int ret;
+ int kvm_fd;
+
+ if (cpuid)
+ return cpuid;
+
+ cpuid = allocate_kvm_cpuid2();
+ kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+ TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i",
+ KVM_DEV_PATH, kvm_fd, errno);
+
+ ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
+ TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
+ ret, errno);
+
+ close(kvm_fd);
+ return cpuid;
+}
+
+/* Locate a cpuid entry.
+ *
+ * Input Args:
+ * cpuid: The cpuid.
+ * function: The function of the cpuid entry to find.
+ *
+ * Output Args: None
+ *
+ * Return: A pointer to the cpuid entry. Never returns NULL.
+ */
+struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
+{
+ struct kvm_cpuid2 *cpuid;
+ struct kvm_cpuid_entry2 *entry = NULL;
+ int i;
+
+ cpuid = kvm_get_supported_cpuid();
+ for (i = 0; i < cpuid->nent; i++) {
+ if (cpuid->entries[i].function == function &&
+ cpuid->entries[i].index == index) {
+ entry = &cpuid->entries[i];
+ break;
+ }
+ }
+
+ TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).",
+ function, index);
+ return entry;
+}
+
+/* VM Userspace Memory Region Add
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * backing_src - Storage source for this region.
+ * NULL to use anonymous memory.
+ * guest_paddr - Starting guest physical address
+ * slot - KVM region slot
+ * npages - Number of physical pages
+ * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES)
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Allocates a memory area of the number of pages specified by npages
+ * and maps it to the VM specified by vm, at a starting physical address
+ * given by guest_paddr. The region is created with a KVM region slot
+ * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The
+ * region is created with the flags given by flags.
+ */
+void vm_userspace_mem_region_add(struct kvm_vm *vm,
+ enum vm_mem_backing_src_type src_type,
+ uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+ uint32_t flags)
+{
+ int ret;
+ unsigned long pmem_size = 0;
+ struct userspace_mem_region *region;
+ size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size;
+
+ TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
+ "address not on a page boundary.\n"
+ " guest_paddr: 0x%lx vm->page_size: 0x%x",
+ guest_paddr, vm->page_size);
+ TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1)
+ <= vm->max_gfn, "Physical range beyond maximum "
+ "supported physical address,\n"
+ " guest_paddr: 0x%lx npages: 0x%lx\n"
+ " vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ guest_paddr, npages, vm->max_gfn, vm->page_size);
+
+ /* Confirm a mem region with an overlapping address doesn't
+ * already exist.
+ */
+ region = (struct userspace_mem_region *) userspace_mem_region_find(
+ vm, guest_paddr, guest_paddr + npages * vm->page_size);
+ if (region != NULL)
+ TEST_ASSERT(false, "overlapping userspace_mem_region already "
+ "exists\n"
+ " requested guest_paddr: 0x%lx npages: 0x%lx "
+ "page_size: 0x%x\n"
+ " existing guest_paddr: 0x%lx size: 0x%lx",
+ guest_paddr, npages, vm->page_size,
+ (uint64_t) region->region.guest_phys_addr,
+ (uint64_t) region->region.memory_size);
+
+ /* Confirm no region with the requested slot already exists. */
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ if (region->region.slot == slot)
+ break;
+ if ((guest_paddr <= (region->region.guest_phys_addr
+ + region->region.memory_size))
+ && ((guest_paddr + npages * vm->page_size)
+ >= region->region.guest_phys_addr))
+ break;
+ }
+ if (region != NULL)
+ TEST_ASSERT(false, "A mem region with the requested slot "
+ "or overlapping physical memory range already exists.\n"
+ " requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
+ " existing slot: %u paddr: 0x%lx size: 0x%lx",
+ slot, guest_paddr, npages,
+ region->region.slot,
+ (uint64_t) region->region.guest_phys_addr,
+ (uint64_t) region->region.memory_size);
+
+ /* Allocate and initialize new mem region structure. */
+ region = calloc(1, sizeof(*region));
+ TEST_ASSERT(region != NULL, "Insufficient Memory");
+ region->mmap_size = npages * vm->page_size;
+
+ /* Enough memory to align up to a huge page. */
+ if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
+ region->mmap_size += huge_page_size;
+ region->mmap_start = mmap(NULL, region->mmap_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS
+ | (src_type == VM_MEM_SRC_ANONYMOUS_HUGETLB ? MAP_HUGETLB : 0),
+ -1, 0);
+ TEST_ASSERT(region->mmap_start != MAP_FAILED,
+ "test_malloc failed, mmap_start: %p errno: %i",
+ region->mmap_start, errno);
+
+ /* Align THP allocation up to start of a huge page. */
+ region->host_mem = align(region->mmap_start,
+ src_type == VM_MEM_SRC_ANONYMOUS_THP ? huge_page_size : 1);
+
+ /* As needed perform madvise */
+ if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) {
+ ret = madvise(region->host_mem, npages * vm->page_size,
+ src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
+ TEST_ASSERT(ret == 0, "madvise failed,\n"
+ " addr: %p\n"
+ " length: 0x%lx\n"
+ " src_type: %x",
+ region->host_mem, npages * vm->page_size, src_type);
+ }
+
+ region->unused_phy_pages = sparsebit_alloc();
+ sparsebit_set_num(region->unused_phy_pages,
+ guest_paddr >> vm->page_shift, npages);
+ region->region.slot = slot;
+ region->region.flags = flags;
+ region->region.guest_phys_addr = guest_paddr;
+ region->region.memory_size = npages * vm->page_size;
+ region->region.userspace_addr = (uintptr_t) region->host_mem;
+ ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+ " rc: %i errno: %i\n"
+ " slot: %u flags: 0x%x\n"
+ " guest_phys_addr: 0x%lx size: 0x%lx",
+ ret, errno, slot, flags,
+ guest_paddr, (uint64_t) region->region.memory_size);
+
+ /* Add to linked-list of memory regions. */
+ if (vm->userspace_mem_region_head)
+ vm->userspace_mem_region_head->prev = region;
+ region->next = vm->userspace_mem_region_head;
+ vm->userspace_mem_region_head = region;
+}
+
+/* Memslot to region
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * memslot - KVM memory slot ID
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to memory region structure that describe memory region
+ * using kvm memory slot ID given by memslot. TEST_ASSERT failure
+ * on error (e.g. currently no memory region using memslot as a KVM
+ * memory slot ID).
+ */
+static struct userspace_mem_region *memslot2region(struct kvm_vm *vm,
+ uint32_t memslot)
+{
+ struct userspace_mem_region *region;
+
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ if (region->region.slot == memslot)
+ break;
+ }
+ if (region == NULL) {
+ fprintf(stderr, "No mem region with the requested slot found,\n"
+ " requested slot: %u\n", memslot);
+ fputs("---- vm dump ----\n", stderr);
+ vm_dump(stderr, vm, 2);
+ TEST_ASSERT(false, "Mem region not found");
+ }
+
+ return region;
+}
+
+/* VM Memory Region Flags Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * flags - Starting guest physical address
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the flags of the memory region specified by the value of slot,
+ * to the values given by flags.
+ */
+void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
+{
+ int ret;
+ struct userspace_mem_region *region;
+
+ /* Locate memory region. */
+ region = memslot2region(vm, slot);
+
+ region->region.flags = flags;
+
+ ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+
+ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+ " rc: %i errno: %i slot: %u flags: 0x%x",
+ ret, errno, slot, flags);
+}
+
+/* VCPU mmap Size
+ *
+ * Input Args: None
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Size of VCPU state
+ *
+ * Returns the size of the structure pointed to by the return value
+ * of vcpu_state().
+ */
+static int vcpu_mmap_sz(void)
+{
+ int dev_fd, ret;
+
+ dev_fd = open(KVM_DEV_PATH, O_RDONLY);
+ TEST_ASSERT(dev_fd >= 0, "%s open %s failed, rc: %i errno: %i",
+ __func__, KVM_DEV_PATH, dev_fd, errno);
+
+ ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
+ TEST_ASSERT(ret >= sizeof(struct kvm_run),
+ "%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i",
+ __func__, ret, errno);
+
+ close(dev_fd);
+
+ return ret;
+}
+
+/* VM VCPU Add
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Creates and adds to the VM specified by vm and virtual CPU with
+ * the ID given by vcpuid.
+ */
+void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct vcpu *vcpu;
+
+ /* Confirm a vcpu with the specified id doesn't already exist. */
+ vcpu = vcpu_find(vm, vcpuid);
+ if (vcpu != NULL)
+ TEST_ASSERT(false, "vcpu with the specified id "
+ "already exists,\n"
+ " requested vcpuid: %u\n"
+ " existing vcpuid: %u state: %p",
+ vcpuid, vcpu->id, vcpu->state);
+
+ /* Allocate and initialize new vcpu structure. */
+ vcpu = calloc(1, sizeof(*vcpu));
+ TEST_ASSERT(vcpu != NULL, "Insufficient Memory");
+ vcpu->id = vcpuid;
+ vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid);
+ TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i",
+ vcpu->fd, errno);
+
+ TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
+ "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
+ vcpu_mmap_sz(), sizeof(*vcpu->state));
+ vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state),
+ PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
+ TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
+ "vcpu id: %u errno: %i", vcpuid, errno);
+
+ /* Add to linked-list of VCPUs. */
+ if (vm->vcpu_head)
+ vm->vcpu_head->prev = vcpu;
+ vcpu->next = vm->vcpu_head;
+ vm->vcpu_head = vcpu;
+
+ vcpu_setup(vm, vcpuid);
+}
+
+/* VM Virtual Address Unused Gap
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * sz - Size (bytes)
+ * vaddr_min - Minimum Virtual Address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Lowest virtual address at or below vaddr_min, with at least
+ * sz unused bytes. TEST_ASSERT failure if no area of at least
+ * size sz is available.
+ *
+ * Within the VM specified by vm, locates the lowest starting virtual
+ * address >= vaddr_min, that has at least sz unallocated bytes. A
+ * TEST_ASSERT failure occurs for invalid input or no area of at least
+ * sz unallocated bytes >= vaddr_min is available.
+ */
+static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
+ vm_vaddr_t vaddr_min)
+{
+ uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
+
+ /* Determine lowest permitted virtual page index. */
+ uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift;
+ if ((pgidx_start * vm->page_size) < vaddr_min)
+ goto no_va_found;
+
+ /* Loop over section with enough valid virtual page indexes. */
+ if (!sparsebit_is_set_num(vm->vpages_valid,
+ pgidx_start, pages))
+ pgidx_start = sparsebit_next_set_num(vm->vpages_valid,
+ pgidx_start, pages);
+ do {
+ /*
+ * Are there enough unused virtual pages available at
+ * the currently proposed starting virtual page index.
+ * If not, adjust proposed starting index to next
+ * possible.
+ */
+ if (sparsebit_is_clear_num(vm->vpages_mapped,
+ pgidx_start, pages))
+ goto va_found;
+ pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped,
+ pgidx_start, pages);
+ if (pgidx_start == 0)
+ goto no_va_found;
+
+ /*
+ * If needed, adjust proposed starting virtual address,
+ * to next range of valid virtual addresses.
+ */
+ if (!sparsebit_is_set_num(vm->vpages_valid,
+ pgidx_start, pages)) {
+ pgidx_start = sparsebit_next_set_num(
+ vm->vpages_valid, pgidx_start, pages);
+ if (pgidx_start == 0)
+ goto no_va_found;
+ }
+ } while (pgidx_start != 0);
+
+no_va_found:
+ TEST_ASSERT(false, "No vaddr of specified pages available, "
+ "pages: 0x%lx", pages);
+
+ /* NOT REACHED */
+ return -1;
+
+va_found:
+ TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid,
+ pgidx_start, pages),
+ "Unexpected, invalid virtual page index range,\n"
+ " pgidx_start: 0x%lx\n"
+ " pages: 0x%lx",
+ pgidx_start, pages);
+ TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped,
+ pgidx_start, pages),
+ "Unexpected, pages already mapped,\n"
+ " pgidx_start: 0x%lx\n"
+ " pages: 0x%lx",
+ pgidx_start, pages);
+
+ return pgidx_start * vm->page_size;
+}
+
+/* VM Virtual Address Allocate
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * sz - Size in bytes
+ * vaddr_min - Minimum starting virtual address
+ * data_memslot - Memory region slot for data pages
+ * pgd_memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Starting guest virtual address
+ *
+ * Allocates at least sz bytes within the virtual address space of the vm
+ * given by vm. The allocated bytes are mapped to a virtual address >=
+ * the address given by vaddr_min. Note that each allocation uses a
+ * a unique set of pages, with the minimum real allocation being at least
+ * a page.
+ */
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
+ uint32_t data_memslot, uint32_t pgd_memslot)
+{
+ uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
+
+ virt_pgd_alloc(vm, pgd_memslot);
+
+ /* Find an unused range of virtual page addresses of at least
+ * pages in length.
+ */
+ vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
+
+ /* Map the virtual pages. */
+ for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
+ pages--, vaddr += vm->page_size) {
+ vm_paddr_t paddr;
+
+ paddr = vm_phy_page_alloc(vm, KVM_UTIL_MIN_PADDR, data_memslot);
+
+ virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+
+ sparsebit_set(vm->vpages_mapped,
+ vaddr >> vm->page_shift);
+ }
+
+ return vaddr_start;
+}
+
+/* Address VM Physical to Host Virtual
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * gpa - VM physical address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent host virtual address
+ *
+ * Locates the memory region containing the VM physical address given
+ * by gpa, within the VM given by vm. When found, the host virtual
+ * address providing the memory to the vm physical address is returned.
+ * A TEST_ASSERT failure occurs if no region containing gpa exists.
+ */
+void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
+{
+ struct userspace_mem_region *region;
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ if ((gpa >= region->region.guest_phys_addr)
+ && (gpa <= (region->region.guest_phys_addr
+ + region->region.memory_size - 1)))
+ return (void *) ((uintptr_t) region->host_mem
+ + (gpa - region->region.guest_phys_addr));
+ }
+
+ TEST_ASSERT(false, "No vm physical memory at 0x%lx", gpa);
+ return NULL;
+}
+
+/* Address Host Virtual to VM Physical
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * hva - Host virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent VM physical address
+ *
+ * Locates the memory region containing the host virtual address given
+ * by hva, within the VM given by vm. When found, the equivalent
+ * VM physical address is returned. A TEST_ASSERT failure occurs if no
+ * region containing hva exists.
+ */
+vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
+{
+ struct userspace_mem_region *region;
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ if ((hva >= region->host_mem)
+ && (hva <= (region->host_mem
+ + region->region.memory_size - 1)))
+ return (vm_paddr_t) ((uintptr_t)
+ region->region.guest_phys_addr
+ + (hva - (uintptr_t) region->host_mem));
+ }
+
+ TEST_ASSERT(false, "No mapping to a guest physical address, "
+ "hva: %p", hva);
+ return -1;
+}
+
+/* VM Create IRQ Chip
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Creates an interrupt controller chip for the VM specified by vm.
+ */
+void vm_create_irqchip(struct kvm_vm *vm)
+{
+ int ret;
+
+ ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0);
+ TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, "
+ "rc: %i errno: %i", ret, errno);
+}
+
+/* VM VCPU State
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to structure that describes the state of the VCPU.
+ *
+ * Locates and returns a pointer to a structure that describes the
+ * state of the VCPU with the given vcpuid.
+ */
+struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ return vcpu->state;
+}
+
+/* VM VCPU Run
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Switch to executing the code for the VCPU given by vcpuid, within the VM
+ * given by vm.
+ */
+void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ int ret = _vcpu_run(vm, vcpuid);
+ TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
+ "rc: %i errno: %i", ret, errno);
+}
+
+int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int rc;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ do {
+ rc = ioctl(vcpu->fd, KVM_RUN, NULL);
+ } while (rc == -1 && errno == EINTR);
+ return rc;
+}
+
+/* VM VCPU Set MP State
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * mp_state - mp_state to be set
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the MP state of the VCPU given by vcpuid, to the state given
+ * by mp_state.
+ */
+void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_mp_state *mp_state)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
+ TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, "
+ "rc: %i errno: %i", ret, errno);
+}
+
+/* VM VCPU Regs Get
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args:
+ * regs - current state of VCPU regs
+ *
+ * Return: None
+ *
+ * Obtains the current register state for the VCPU specified by vcpuid
+ * and stores it at the location given by regs.
+ */
+void vcpu_regs_get(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_regs *regs)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Get the regs. */
+ ret = ioctl(vcpu->fd, KVM_GET_REGS, regs);
+ TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i",
+ ret, errno);
+}
+
+/* VM VCPU Regs Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * regs - Values to set VCPU regs to
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the regs of the VCPU specified by vcpuid to the values
+ * given by regs.
+ */
+void vcpu_regs_set(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_regs *regs)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Set the regs. */
+ ret = ioctl(vcpu->fd, KVM_SET_REGS, regs);
+ TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i",
+ ret, errno);
+}
+
+void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_vcpu_events *events)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Get the regs. */
+ ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events);
+ TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i",
+ ret, errno);
+}
+
+void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_vcpu_events *events)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Set the regs. */
+ ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events);
+ TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
+ ret, errno);
+}
+
+/* VM VCPU Args Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * num - number of arguments
+ * ... - arguments, each of type uint64_t
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the first num function input arguments to the values
+ * given as variable args. Each of the variable args is expected to
+ * be of type uint64_t.
+ */
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+{
+ va_list ap;
+ struct kvm_regs regs;
+
+ TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
+ " num: %u\n",
+ num);
+
+ va_start(ap, num);
+ vcpu_regs_get(vm, vcpuid, &regs);
+
+ if (num >= 1)
+ regs.rdi = va_arg(ap, uint64_t);
+
+ if (num >= 2)
+ regs.rsi = va_arg(ap, uint64_t);
+
+ if (num >= 3)
+ regs.rdx = va_arg(ap, uint64_t);
+
+ if (num >= 4)
+ regs.rcx = va_arg(ap, uint64_t);
+
+ if (num >= 5)
+ regs.r8 = va_arg(ap, uint64_t);
+
+ if (num >= 6)
+ regs.r9 = va_arg(ap, uint64_t);
+
+ vcpu_regs_set(vm, vcpuid, &regs);
+ va_end(ap);
+}
+
+/* VM VCPU System Regs Get
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args:
+ * sregs - current state of VCPU system regs
+ *
+ * Return: None
+ *
+ * Obtains the current system register state for the VCPU specified by
+ * vcpuid and stores it at the location given by sregs.
+ */
+void vcpu_sregs_get(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_sregs *sregs)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Get the regs. */
+ /* Get the regs. */
+ ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
+ TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i",
+ ret, errno);
+}
+
+/* VM VCPU System Regs Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * sregs - Values to set VCPU system regs to
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the system regs of the VCPU specified by vcpuid to the values
+ * given by sregs.
+ */
+void vcpu_sregs_set(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_sregs *sregs)
+{
+ int ret = _vcpu_sregs_set(vm, vcpuid, sregs);
+ TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
+ "rc: %i errno: %i", ret, errno);
+}
+
+int _vcpu_sregs_set(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_sregs *sregs)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Get the regs. */
+ return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
+}
+
+/* VCPU Ioctl
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * cmd - Ioctl number
+ * arg - Argument to pass to the ioctl
+ *
+ * Return: None
+ *
+ * Issues an arbitrary ioctl on a VCPU fd.
+ */
+void vcpu_ioctl(struct kvm_vm *vm,
+ uint32_t vcpuid, unsigned long cmd, void *arg)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ ret = ioctl(vcpu->fd, cmd, arg);
+ TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)",
+ cmd, ret, errno, strerror(errno));
+}
+
+/* VM Ioctl
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * cmd - Ioctl number
+ * arg - Argument to pass to the ioctl
+ *
+ * Return: None
+ *
+ * Issues an arbitrary ioctl on a VM fd.
+ */
+void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
+{
+ int ret;
+
+ ret = ioctl(vm->fd, cmd, arg);
+ TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)",
+ cmd, ret, errno, strerror(errno));
+}
+
+/* VM Dump
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * indent - Left margin indent amount
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the current state of the VM given by vm, to the FILE stream
+ * given by stream.
+ */
+void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ struct userspace_mem_region *region;
+ struct vcpu *vcpu;
+
+ fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode);
+ fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
+ fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
+ fprintf(stream, "%*sMem Regions:\n", indent, "");
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
+ "host_virt: %p\n", indent + 2, "",
+ (uint64_t) region->region.guest_phys_addr,
+ (uint64_t) region->region.memory_size,
+ region->host_mem);
+ fprintf(stream, "%*sunused_phy_pages: ", indent + 2, "");
+ sparsebit_dump(stream, region->unused_phy_pages, 0);
+ }
+ fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
+ sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
+ fprintf(stream, "%*spgd_created: %u\n", indent, "",
+ vm->pgd_created);
+ if (vm->pgd_created) {
+ fprintf(stream, "%*sVirtual Translation Tables:\n",
+ indent + 2, "");
+ virt_dump(stream, vm, indent + 4);
+ }
+ fprintf(stream, "%*sVCPUs:\n", indent, "");
+ for (vcpu = vm->vcpu_head; vcpu; vcpu = vcpu->next)
+ vcpu_dump(stream, vm, vcpu->id, indent + 2);
+}
+
+/* VM VCPU Dump
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * indent - Left margin indent amount
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the current state of the VCPU specified by vcpuid, within the VM
+ * given by vm, to the FILE stream given by stream.
+ */
+void vcpu_dump(FILE *stream, struct kvm_vm *vm,
+ uint32_t vcpuid, uint8_t indent)
+{
+ struct kvm_regs regs;
+ struct kvm_sregs sregs;
+
+ fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid);
+
+ fprintf(stream, "%*sregs:\n", indent + 2, "");
+ vcpu_regs_get(vm, vcpuid, &regs);
+ regs_dump(stream, &regs, indent + 4);
+
+ fprintf(stream, "%*ssregs:\n", indent + 2, "");
+ vcpu_sregs_get(vm, vcpuid, &sregs);
+ sregs_dump(stream, &sregs, indent + 4);
+}
+
+/* Known KVM exit reasons */
+static struct exit_reason {
+ unsigned int reason;
+ const char *name;
+} exit_reasons_known[] = {
+ {KVM_EXIT_UNKNOWN, "UNKNOWN"},
+ {KVM_EXIT_EXCEPTION, "EXCEPTION"},
+ {KVM_EXIT_IO, "IO"},
+ {KVM_EXIT_HYPERCALL, "HYPERCALL"},
+ {KVM_EXIT_DEBUG, "DEBUG"},
+ {KVM_EXIT_HLT, "HLT"},
+ {KVM_EXIT_MMIO, "MMIO"},
+ {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"},
+ {KVM_EXIT_SHUTDOWN, "SHUTDOWN"},
+ {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"},
+ {KVM_EXIT_INTR, "INTR"},
+ {KVM_EXIT_SET_TPR, "SET_TPR"},
+ {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"},
+ {KVM_EXIT_S390_SIEIC, "S390_SIEIC"},
+ {KVM_EXIT_S390_RESET, "S390_RESET"},
+ {KVM_EXIT_DCR, "DCR"},
+ {KVM_EXIT_NMI, "NMI"},
+ {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
+ {KVM_EXIT_OSI, "OSI"},
+ {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
+#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
+ {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
+#endif
+};
+
+/* Exit Reason String
+ *
+ * Input Args:
+ * exit_reason - Exit reason
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Constant string pointer describing the exit reason.
+ *
+ * Locates and returns a constant string that describes the KVM exit
+ * reason given by exit_reason. If no such string is found, a constant
+ * string of "Unknown" is returned.
+ */
+const char *exit_reason_str(unsigned int exit_reason)
+{
+ unsigned int n1;
+
+ for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) {
+ if (exit_reason == exit_reasons_known[n1].reason)
+ return exit_reasons_known[n1].name;
+ }
+
+ return "Unknown";
+}
+
+/* Physical Page Allocate
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * paddr_min - Physical address minimum
+ * memslot - Memory region to allocate page from
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Starting physical address
+ *
+ * Within the VM specified by vm, locates an available physical page
+ * at or above paddr_min. If found, the page is marked as in use
+ * and its address is returned. A TEST_ASSERT failure occurs if no
+ * page is available at or above paddr_min.
+ */
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm,
+ vm_paddr_t paddr_min, uint32_t memslot)
+{
+ struct userspace_mem_region *region;
+ sparsebit_idx_t pg;
+
+ TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address "
+ "not divisible by page size.\n"
+ " paddr_min: 0x%lx page_size: 0x%x",
+ paddr_min, vm->page_size);
+
+ /* Locate memory region. */
+ region = memslot2region(vm, memslot);
+
+ /* Locate next available physical page at or above paddr_min. */
+ pg = paddr_min >> vm->page_shift;
+
+ if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
+ pg = sparsebit_next_set(region->unused_phy_pages, pg);
+ if (pg == 0) {
+ fprintf(stderr, "No guest physical page available, "
+ "paddr_min: 0x%lx page_size: 0x%x memslot: %u",
+ paddr_min, vm->page_size, memslot);
+ fputs("---- vm dump ----\n", stderr);
+ vm_dump(stderr, vm, 2);
+ abort();
+ }
+ }
+
+ /* Specify page as in use and return its address. */
+ sparsebit_clear(region->unused_phy_pages, pg);
+
+ return pg * vm->page_size;
+}
+
+/* Address Guest Virtual to Host Virtual
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * gva - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent host virtual address
+ */
+void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
new file mode 100644
index 000000000000..a0bd1980c81c
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -0,0 +1,67 @@
+/*
+ * tools/testing/selftests/kvm/lib/kvm_util.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#ifndef KVM_UTIL_INTERNAL_H
+#define KVM_UTIL_INTERNAL_H 1
+
+#include "sparsebit.h"
+
+#ifndef BITS_PER_BYTE
+#define BITS_PER_BYTE 8
+#endif
+
+#ifndef BITS_PER_LONG
+#define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long))
+#endif
+
+#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG)
+
+/* Concrete definition of struct kvm_vm. */
+struct userspace_mem_region {
+ struct userspace_mem_region *next, *prev;
+ struct kvm_userspace_memory_region region;
+ struct sparsebit *unused_phy_pages;
+ int fd;
+ off_t offset;
+ void *host_mem;
+ void *mmap_start;
+ size_t mmap_size;
+};
+
+struct vcpu {
+ struct vcpu *next, *prev;
+ uint32_t id;
+ int fd;
+ struct kvm_run *state;
+};
+
+struct kvm_vm {
+ int mode;
+ int fd;
+ unsigned int page_size;
+ unsigned int page_shift;
+ uint64_t max_gfn;
+ struct vcpu *vcpu_head;
+ struct userspace_mem_region *userspace_mem_region_head;
+ struct sparsebit *vpages_valid;
+ struct sparsebit *vpages_mapped;
+ bool pgd_created;
+ vm_paddr_t pgd;
+};
+
+struct vcpu *vcpu_find(struct kvm_vm *vm,
+ uint32_t vcpuid);
+void vcpu_setup(struct kvm_vm *vm, int vcpuid);
+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+void regs_dump(FILE *stream, struct kvm_regs *regs,
+ uint8_t indent);
+void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
+ uint8_t indent);
+
+#endif
diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c
new file mode 100644
index 000000000000..b132bc95d183
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/sparsebit.c
@@ -0,0 +1,2087 @@
+/*
+ * Sparse bit array
+ *
+ * Copyright (C) 2018, Google LLC.
+ * Copyright (C) 2018, Red Hat, Inc. (code style cleanup and fuzzing driver)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * This library provides functions to support a memory efficient bit array,
+ * with an index size of 2^64. A sparsebit array is allocated through
+ * the use sparsebit_alloc() and free'd via sparsebit_free(),
+ * such as in the following:
+ *
+ * struct sparsebit *s;
+ * s = sparsebit_alloc();
+ * sparsebit_free(&s);
+ *
+ * The struct sparsebit type resolves down to a struct sparsebit.
+ * Note that, sparsebit_free() takes a pointer to the sparsebit
+ * structure. This is so that sparsebit_free() is able to poison
+ * the pointer (e.g. set it to NULL) to the struct sparsebit before
+ * returning to the caller.
+ *
+ * Between the return of sparsebit_alloc() and the call of
+ * sparsebit_free(), there are multiple query and modifying operations
+ * that can be performed on the allocated sparsebit array. All of
+ * these operations take as a parameter the value returned from
+ * sparsebit_alloc() and most also take a bit index. Frequently
+ * used routines include:
+ *
+ * ---- Query Operations
+ * sparsebit_is_set(s, idx)
+ * sparsebit_is_clear(s, idx)
+ * sparsebit_any_set(s)
+ * sparsebit_first_set(s)
+ * sparsebit_next_set(s, prev_idx)
+ *
+ * ---- Modifying Operations
+ * sparsebit_set(s, idx)
+ * sparsebit_clear(s, idx)
+ * sparsebit_set_num(s, idx, num);
+ * sparsebit_clear_num(s, idx, num);
+ *
+ * A common operation, is to itterate over all the bits set in a test
+ * sparsebit array. This can be done via code with the following structure:
+ *
+ * sparsebit_idx_t idx;
+ * if (sparsebit_any_set(s)) {
+ * idx = sparsebit_first_set(s);
+ * do {
+ * ...
+ * idx = sparsebit_next_set(s, idx);
+ * } while (idx != 0);
+ * }
+ *
+ * The index of the first bit set needs to be obtained via
+ * sparsebit_first_set(), because sparsebit_next_set(), needs
+ * the index of the previously set. The sparsebit_idx_t type is
+ * unsigned, so there is no previous index before 0 that is available.
+ * Also, the call to sparsebit_first_set() is not made unless there
+ * is at least 1 bit in the array set. This is because sparsebit_first_set()
+ * aborts if sparsebit_first_set() is called with no bits set.
+ * It is the callers responsibility to assure that the
+ * sparsebit array has at least a single bit set before calling
+ * sparsebit_first_set().
+ *
+ * ==== Implementation Overview ====
+ * For the most part the internal implementation of sparsebit is
+ * opaque to the caller. One important implementation detail that the
+ * caller may need to be aware of is the spatial complexity of the
+ * implementation. This implementation of a sparsebit array is not
+ * only sparse, in that it uses memory proportional to the number of bits
+ * set. It is also efficient in memory usage when most of the bits are
+ * set.
+ *
+ * At a high-level the state of the bit settings are maintained through
+ * the use of a binary-search tree, where each node contains at least
+ * the following members:
+ *
+ * typedef uint64_t sparsebit_idx_t;
+ * typedef uint64_t sparsebit_num_t;
+ *
+ * sparsebit_idx_t idx;
+ * uint32_t mask;
+ * sparsebit_num_t num_after;
+ *
+ * The idx member contains the bit index of the first bit described by this
+ * node, while the mask member stores the setting of the first 32-bits.
+ * The setting of the bit at idx + n, where 0 <= n < 32, is located in the
+ * mask member at 1 << n.
+ *
+ * Nodes are sorted by idx and the bits described by two nodes will never
+ * overlap. The idx member is always aligned to the mask size, i.e. a
+ * multiple of 32.
+ *
+ * Beyond a typical implementation, the nodes in this implementation also
+ * contains a member named num_after. The num_after member holds the
+ * number of bits immediately after the mask bits that are contiguously set.
+ * The use of the num_after member allows this implementation to efficiently
+ * represent cases where most bits are set. For example, the case of all
+ * but the last two bits set, is represented by the following two nodes:
+ *
+ * node 0 - idx: 0x0 mask: 0xffffffff num_after: 0xffffffffffffffc0
+ * node 1 - idx: 0xffffffffffffffe0 mask: 0x3fffffff num_after: 0
+ *
+ * ==== Invariants ====
+ * This implementation usses the following invariants:
+ *
+ * + Node are only used to represent bits that are set.
+ * Nodes with a mask of 0 and num_after of 0 are not allowed.
+ *
+ * + Sum of bits set in all the nodes is equal to the value of
+ * the struct sparsebit_pvt num_set member.
+ *
+ * + The setting of at least one bit is always described in a nodes
+ * mask (mask >= 1).
+ *
+ * + A node with all mask bits set only occurs when the last bit
+ * described by the previous node is not equal to this nodes
+ * starting index - 1. All such occurences of this condition are
+ * avoided by moving the setting of the nodes mask bits into
+ * the previous nodes num_after setting.
+ *
+ * + Node starting index is evenly divisible by the number of bits
+ * within a nodes mask member.
+ *
+ * + Nodes never represent a range of bits that wrap around the
+ * highest supported index.
+ *
+ * (idx + MASK_BITS + num_after - 1) <= ((sparsebit_idx_t) 0) - 1)
+ *
+ * As a consequence of the above, the num_after member of a node
+ * will always be <=:
+ *
+ * maximum_index - nodes_starting_index - number_of_mask_bits
+ *
+ * + Nodes within the binary search tree are sorted based on each
+ * nodes starting index.
+ *
+ * + The range of bits described by any two nodes do not overlap. The
+ * range of bits described by a single node is:
+ *
+ * start: node->idx
+ * end (inclusive): node->idx + MASK_BITS + node->num_after - 1;
+ *
+ * Note, at times these invariants are temporarily violated for a
+ * specific portion of the code. For example, when setting a mask
+ * bit, there is a small delay between when the mask bit is set and the
+ * value in the struct sparsebit_pvt num_set member is updated. Other
+ * temporary violations occur when node_split() is called with a specified
+ * index and assures that a node where its mask represents the bit
+ * at the specified index exists. At times to do this node_split()
+ * must split an existing node into two nodes or create a node that
+ * has no bits set. Such temporary violations must be corrected before
+ * returning to the caller. These corrections are typically performed
+ * by the local function node_reduce().
+ */
+
+#include "test_util.h"
+#include "sparsebit.h"
+#include <limits.h>
+#include <assert.h>
+
+#define DUMP_LINE_MAX 100 /* Does not include indent amount */
+
+typedef uint32_t mask_t;
+#define MASK_BITS (sizeof(mask_t) * CHAR_BIT)
+
+struct node {
+ struct node *parent;
+ struct node *left;
+ struct node *right;
+ sparsebit_idx_t idx; /* index of least-significant bit in mask */
+ sparsebit_num_t num_after; /* num contiguously set after mask */
+ mask_t mask;
+};
+
+struct sparsebit {
+ /*
+ * Points to root node of the binary search
+ * tree. Equal to NULL when no bits are set in
+ * the entire sparsebit array.
+ */
+ struct node *root;
+
+ /*
+ * A redundant count of the total number of bits set. Used for
+ * diagnostic purposes and to change the time complexity of
+ * sparsebit_num_set() from O(n) to O(1).
+ * Note: Due to overflow, a value of 0 means none or all set.
+ */
+ sparsebit_num_t num_set;
+};
+
+/* Returns the number of set bits described by the settings
+ * of the node pointed to by nodep.
+ */
+static sparsebit_num_t node_num_set(struct node *nodep)
+{
+ return nodep->num_after + __builtin_popcount(nodep->mask);
+}
+
+/* Returns a pointer to the node that describes the
+ * lowest bit index.
+ */
+static struct node *node_first(struct sparsebit *s)
+{
+ struct node *nodep;
+
+ for (nodep = s->root; nodep && nodep->left; nodep = nodep->left)
+ ;
+
+ return nodep;
+}
+
+/* Returns a pointer to the node that describes the
+ * lowest bit index > the index of the node pointed to by np.
+ * Returns NULL if no node with a higher index exists.
+ */
+static struct node *node_next(struct sparsebit *s, struct node *np)
+{
+ struct node *nodep = np;
+
+ /*
+ * If current node has a right child, next node is the left-most
+ * of the right child.
+ */
+ if (nodep->right) {
+ for (nodep = nodep->right; nodep->left; nodep = nodep->left)
+ ;
+ return nodep;
+ }
+
+ /*
+ * No right child. Go up until node is left child of a parent.
+ * That parent is then the next node.
+ */
+ while (nodep->parent && nodep == nodep->parent->right)
+ nodep = nodep->parent;
+
+ return nodep->parent;
+}
+
+/* Searches for and returns a pointer to the node that describes the
+ * highest index < the index of the node pointed to by np.
+ * Returns NULL if no node with a lower index exists.
+ */
+static struct node *node_prev(struct sparsebit *s, struct node *np)
+{
+ struct node *nodep = np;
+
+ /*
+ * If current node has a left child, next node is the right-most
+ * of the left child.
+ */
+ if (nodep->left) {
+ for (nodep = nodep->left; nodep->right; nodep = nodep->right)
+ ;
+ return (struct node *) nodep;
+ }
+
+ /*
+ * No left child. Go up until node is right child of a parent.
+ * That parent is then the next node.
+ */
+ while (nodep->parent && nodep == nodep->parent->left)
+ nodep = nodep->parent;
+
+ return (struct node *) nodep->parent;
+}
+
+
+/* Allocates space to hold a copy of the node sub-tree pointed to by
+ * subtree and duplicates the bit settings to the newly allocated nodes.
+ * Returns the newly allocated copy of subtree.
+ */
+static struct node *node_copy_subtree(struct node *subtree)
+{
+ struct node *root;
+
+ /* Duplicate the node at the root of the subtree */
+ root = calloc(1, sizeof(*root));
+ if (!root) {
+ perror("calloc");
+ abort();
+ }
+
+ root->idx = subtree->idx;
+ root->mask = subtree->mask;
+ root->num_after = subtree->num_after;
+
+ /* As needed, recursively duplicate the left and right subtrees */
+ if (subtree->left) {
+ root->left = node_copy_subtree(subtree->left);
+ root->left->parent = root;
+ }
+
+ if (subtree->right) {
+ root->right = node_copy_subtree(subtree->right);
+ root->right->parent = root;
+ }
+
+ return root;
+}
+
+/* Searches for and returns a pointer to the node that describes the setting
+ * of the bit given by idx. A node describes the setting of a bit if its
+ * index is within the bits described by the mask bits or the number of
+ * contiguous bits set after the mask. Returns NULL if there is no such node.
+ */
+static struct node *node_find(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ struct node *nodep;
+
+ /* Find the node that describes the setting of the bit at idx */
+ for (nodep = s->root; nodep;
+ nodep = nodep->idx > idx ? nodep->left : nodep->right) {
+ if (idx >= nodep->idx &&
+ idx <= nodep->idx + MASK_BITS + nodep->num_after - 1)
+ break;
+ }
+
+ return nodep;
+}
+
+/* Entry Requirements:
+ * + A node that describes the setting of idx is not already present.
+ *
+ * Adds a new node to describe the setting of the bit at the index given
+ * by idx. Returns a pointer to the newly added node.
+ *
+ * TODO(lhuemill): Degenerate cases causes the tree to get unbalanced.
+ */
+static struct node *node_add(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ struct node *nodep, *parentp, *prev;
+
+ /* Allocate and initialize the new node. */
+ nodep = calloc(1, sizeof(*nodep));
+ if (!nodep) {
+ perror("calloc");
+ abort();
+ }
+
+ nodep->idx = idx & -MASK_BITS;
+
+ /* If no nodes, set it up as the root node. */
+ if (!s->root) {
+ s->root = nodep;
+ return nodep;
+ }
+
+ /*
+ * Find the parent where the new node should be attached
+ * and add the node there.
+ */
+ parentp = s->root;
+ while (true) {
+ if (idx < parentp->idx) {
+ if (!parentp->left) {
+ parentp->left = nodep;
+ nodep->parent = parentp;
+ break;
+ }
+ parentp = parentp->left;
+ } else {
+ assert(idx > parentp->idx + MASK_BITS + parentp->num_after - 1);
+ if (!parentp->right) {
+ parentp->right = nodep;
+ nodep->parent = parentp;
+ break;
+ }
+ parentp = parentp->right;
+ }
+ }
+
+ /*
+ * Does num_after bits of previous node overlap with the mask
+ * of the new node? If so set the bits in the new nodes mask
+ * and reduce the previous nodes num_after.
+ */
+ prev = node_prev(s, nodep);
+ while (prev && prev->idx + MASK_BITS + prev->num_after - 1 >= nodep->idx) {
+ unsigned int n1 = (prev->idx + MASK_BITS + prev->num_after - 1)
+ - nodep->idx;
+ assert(prev->num_after > 0);
+ assert(n1 < MASK_BITS);
+ assert(!(nodep->mask & (1 << n1)));
+ nodep->mask |= (1 << n1);
+ prev->num_after--;
+ }
+
+ return nodep;
+}
+
+/* Returns whether all the bits in the sparsebit array are set. */
+bool sparsebit_all_set(struct sparsebit *s)
+{
+ /*
+ * If any nodes there must be at least one bit set. Only case
+ * where a bit is set and total num set is 0, is when all bits
+ * are set.
+ */
+ return s->root && s->num_set == 0;
+}
+
+/* Clears all bits described by the node pointed to by nodep, then
+ * removes the node.
+ */
+static void node_rm(struct sparsebit *s, struct node *nodep)
+{
+ struct node *tmp;
+ sparsebit_num_t num_set;
+
+ num_set = node_num_set(nodep);
+ assert(s->num_set >= num_set || sparsebit_all_set(s));
+ s->num_set -= node_num_set(nodep);
+
+ /* Have both left and right child */
+ if (nodep->left && nodep->right) {
+ /*
+ * Move left children to the leftmost leaf node
+ * of the right child.
+ */
+ for (tmp = nodep->right; tmp->left; tmp = tmp->left)
+ ;
+ tmp->left = nodep->left;
+ nodep->left = NULL;
+ tmp->left->parent = tmp;
+ }
+
+ /* Left only child */
+ if (nodep->left) {
+ if (!nodep->parent) {
+ s->root = nodep->left;
+ nodep->left->parent = NULL;
+ } else {
+ nodep->left->parent = nodep->parent;
+ if (nodep == nodep->parent->left)
+ nodep->parent->left = nodep->left;
+ else {
+ assert(nodep == nodep->parent->right);
+ nodep->parent->right = nodep->left;
+ }
+ }
+
+ nodep->parent = nodep->left = nodep->right = NULL;
+ free(nodep);
+
+ return;
+ }
+
+
+ /* Right only child */
+ if (nodep->right) {
+ if (!nodep->parent) {
+ s->root = nodep->right;
+ nodep->right->parent = NULL;
+ } else {
+ nodep->right->parent = nodep->parent;
+ if (nodep == nodep->parent->left)
+ nodep->parent->left = nodep->right;
+ else {
+ assert(nodep == nodep->parent->right);
+ nodep->parent->right = nodep->right;
+ }
+ }
+
+ nodep->parent = nodep->left = nodep->right = NULL;
+ free(nodep);
+
+ return;
+ }
+
+ /* Leaf Node */
+ if (!nodep->parent) {
+ s->root = NULL;
+ } else {
+ if (nodep->parent->left == nodep)
+ nodep->parent->left = NULL;
+ else {
+ assert(nodep == nodep->parent->right);
+ nodep->parent->right = NULL;
+ }
+ }
+
+ nodep->parent = nodep->left = nodep->right = NULL;
+ free(nodep);
+
+ return;
+}
+
+/* Splits the node containing the bit at idx so that there is a node
+ * that starts at the specified index. If no such node exists, a new
+ * node at the specified index is created. Returns the new node.
+ *
+ * idx must start of a mask boundary.
+ */
+static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ struct node *nodep1, *nodep2;
+ sparsebit_idx_t offset;
+ sparsebit_num_t orig_num_after;
+
+ assert(!(idx % MASK_BITS));
+
+ /*
+ * Is there a node that describes the setting of idx?
+ * If not, add it.
+ */
+ nodep1 = node_find(s, idx);
+ if (!nodep1)
+ return node_add(s, idx);
+
+ /*
+ * All done if the starting index of the node is where the
+ * split should occur.
+ */
+ if (nodep1->idx == idx)
+ return nodep1;
+
+ /*
+ * Split point not at start of mask, so it must be part of
+ * bits described by num_after.
+ */
+
+ /*
+ * Calculate offset within num_after for where the split is
+ * to occur.
+ */
+ offset = idx - (nodep1->idx + MASK_BITS);
+ orig_num_after = nodep1->num_after;
+
+ /*
+ * Add a new node to describe the bits starting at
+ * the split point.
+ */
+ nodep1->num_after = offset;
+ nodep2 = node_add(s, idx);
+
+ /* Move bits after the split point into the new node */
+ nodep2->num_after = orig_num_after - offset;
+ if (nodep2->num_after >= MASK_BITS) {
+ nodep2->mask = ~(mask_t) 0;
+ nodep2->num_after -= MASK_BITS;
+ } else {
+ nodep2->mask = (1 << nodep2->num_after) - 1;
+ nodep2->num_after = 0;
+ }
+
+ return nodep2;
+}
+
+/* Iteratively reduces the node pointed to by nodep and its adjacent
+ * nodes into a more compact form. For example, a node with a mask with
+ * all bits set adjacent to a previous node, will get combined into a
+ * single node with an increased num_after setting.
+ *
+ * After each reduction, a further check is made to see if additional
+ * reductions are possible with the new previous and next nodes. Note,
+ * a search for a reduction is only done across the nodes nearest nodep
+ * and those that became part of a reduction. Reductions beyond nodep
+ * and the adjacent nodes that are reduced are not discovered. It is the
+ * responsibility of the caller to pass a nodep that is within one node
+ * of each possible reduction.
+ *
+ * This function does not fix the temporary violation of all invariants.
+ * For example it does not fix the case where the bit settings described
+ * by two or more nodes overlap. Such a violation introduces the potential
+ * complication of a bit setting for a specific index having different settings
+ * in different nodes. This would then introduce the further complication
+ * of which node has the correct setting of the bit and thus such conditions
+ * are not allowed.
+ *
+ * This function is designed to fix invariant violations that are introduced
+ * by node_split() and by changes to the nodes mask or num_after members.
+ * For example, when setting a bit within a nodes mask, the function that
+ * sets the bit doesn't have to worry about whether the setting of that
+ * bit caused the mask to have leading only or trailing only bits set.
+ * Instead, the function can call node_reduce(), with nodep equal to the
+ * node address that it set a mask bit in, and node_reduce() will notice
+ * the cases of leading or trailing only bits and that there is an
+ * adjacent node that the bit settings could be merged into.
+ *
+ * This implementation specifically detects and corrects violation of the
+ * following invariants:
+ *
+ * + Node are only used to represent bits that are set.
+ * Nodes with a mask of 0 and num_after of 0 are not allowed.
+ *
+ * + The setting of at least one bit is always described in a nodes
+ * mask (mask >= 1).
+ *
+ * + A node with all mask bits set only occurs when the last bit
+ * described by the previous node is not equal to this nodes
+ * starting index - 1. All such occurences of this condition are
+ * avoided by moving the setting of the nodes mask bits into
+ * the previous nodes num_after setting.
+ */
+static void node_reduce(struct sparsebit *s, struct node *nodep)
+{
+ bool reduction_performed;
+
+ do {
+ reduction_performed = false;
+ struct node *prev, *next, *tmp;
+
+ /* 1) Potential reductions within the current node. */
+
+ /* Nodes with all bits cleared may be removed. */
+ if (nodep->mask == 0 && nodep->num_after == 0) {
+ /*
+ * About to remove the node pointed to by
+ * nodep, which normally would cause a problem
+ * for the next pass through the reduction loop,
+ * because the node at the starting point no longer
+ * exists. This potential problem is handled
+ * by first remembering the location of the next
+ * or previous nodes. Doesn't matter which, because
+ * once the node at nodep is removed, there will be
+ * no other nodes between prev and next.
+ *
+ * Note, the checks performed on nodep against both
+ * both prev and next both check for an adjacent
+ * node that can be reduced into a single node. As
+ * such, after removing the node at nodep, doesn't
+ * matter whether the nodep for the next pass
+ * through the loop is equal to the previous pass
+ * prev or next node. Either way, on the next pass
+ * the one not selected will become either the
+ * prev or next node.
+ */
+ tmp = node_next(s, nodep);
+ if (!tmp)
+ tmp = node_prev(s, nodep);
+
+ node_rm(s, nodep);
+ nodep = NULL;
+
+ nodep = tmp;
+ reduction_performed = true;
+ continue;
+ }
+
+ /*
+ * When the mask is 0, can reduce the amount of num_after
+ * bits by moving the initial num_after bits into the mask.
+ */
+ if (nodep->mask == 0) {
+ assert(nodep->num_after != 0);
+ assert(nodep->idx + MASK_BITS > nodep->idx);
+
+ nodep->idx += MASK_BITS;
+
+ if (nodep->num_after >= MASK_BITS) {
+ nodep->mask = ~0;
+ nodep->num_after -= MASK_BITS;
+ } else {
+ nodep->mask = (1u << nodep->num_after) - 1;
+ nodep->num_after = 0;
+ }
+
+ reduction_performed = true;
+ continue;
+ }
+
+ /*
+ * 2) Potential reductions between the current and
+ * previous nodes.
+ */
+ prev = node_prev(s, nodep);
+ if (prev) {
+ sparsebit_idx_t prev_highest_bit;
+
+ /* Nodes with no bits set can be removed. */
+ if (prev->mask == 0 && prev->num_after == 0) {
+ node_rm(s, prev);
+
+ reduction_performed = true;
+ continue;
+ }
+
+ /*
+ * All mask bits set and previous node has
+ * adjacent index.
+ */
+ if (nodep->mask + 1 == 0 &&
+ prev->idx + MASK_BITS == nodep->idx) {
+ prev->num_after += MASK_BITS + nodep->num_after;
+ nodep->mask = 0;
+ nodep->num_after = 0;
+
+ reduction_performed = true;
+ continue;
+ }
+
+ /*
+ * Is node adjacent to previous node and the node
+ * contains a single contiguous range of bits
+ * starting from the beginning of the mask?
+ */
+ prev_highest_bit = prev->idx + MASK_BITS - 1 + prev->num_after;
+ if (prev_highest_bit + 1 == nodep->idx &&
+ (nodep->mask | (nodep->mask >> 1)) == nodep->mask) {
+ /*
+ * How many contiguous bits are there?
+ * Is equal to the total number of set
+ * bits, due to an earlier check that
+ * there is a single contiguous range of
+ * set bits.
+ */
+ unsigned int num_contiguous
+ = __builtin_popcount(nodep->mask);
+ assert((num_contiguous > 0) &&
+ ((1ULL << num_contiguous) - 1) == nodep->mask);
+
+ prev->num_after += num_contiguous;
+ nodep->mask = 0;
+
+ /*
+ * For predictable performance, handle special
+ * case where all mask bits are set and there
+ * is a non-zero num_after setting. This code
+ * is functionally correct without the following
+ * conditionalized statements, but without them
+ * the value of num_after is only reduced by
+ * the number of mask bits per pass. There are
+ * cases where num_after can be close to 2^64.
+ * Without this code it could take nearly
+ * (2^64) / 32 passes to perform the full
+ * reduction.
+ */
+ if (num_contiguous == MASK_BITS) {
+ prev->num_after += nodep->num_after;
+ nodep->num_after = 0;
+ }
+
+ reduction_performed = true;
+ continue;
+ }
+ }
+
+ /*
+ * 3) Potential reductions between the current and
+ * next nodes.
+ */
+ next = node_next(s, nodep);
+ if (next) {
+ /* Nodes with no bits set can be removed. */
+ if (next->mask == 0 && next->num_after == 0) {
+ node_rm(s, next);
+ reduction_performed = true;
+ continue;
+ }
+
+ /*
+ * Is next node index adjacent to current node
+ * and has a mask with all bits set?
+ */
+ if (next->idx == nodep->idx + MASK_BITS + nodep->num_after &&
+ next->mask == ~(mask_t) 0) {
+ nodep->num_after += MASK_BITS;
+ next->mask = 0;
+ nodep->num_after += next->num_after;
+ next->num_after = 0;
+
+ node_rm(s, next);
+ next = NULL;
+
+ reduction_performed = true;
+ continue;
+ }
+ }
+ } while (nodep && reduction_performed);
+}
+
+/* Returns whether the bit at the index given by idx, within the
+ * sparsebit array is set or not.
+ */
+bool sparsebit_is_set(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ struct node *nodep;
+
+ /* Find the node that describes the setting of the bit at idx */
+ for (nodep = s->root; nodep;
+ nodep = nodep->idx > idx ? nodep->left : nodep->right)
+ if (idx >= nodep->idx &&
+ idx <= nodep->idx + MASK_BITS + nodep->num_after - 1)
+ goto have_node;
+
+ return false;
+
+have_node:
+ /* Bit is set if it is any of the bits described by num_after */
+ if (nodep->num_after && idx >= nodep->idx + MASK_BITS)
+ return true;
+
+ /* Is the corresponding mask bit set */
+ assert(idx >= nodep->idx && idx - nodep->idx < MASK_BITS);
+ return !!(nodep->mask & (1 << (idx - nodep->idx)));
+}
+
+/* Within the sparsebit array pointed to by s, sets the bit
+ * at the index given by idx.
+ */
+static void bit_set(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ struct node *nodep;
+
+ /* Skip bits that are already set */
+ if (sparsebit_is_set(s, idx))
+ return;
+
+ /*
+ * Get a node where the bit at idx is described by the mask.
+ * The node_split will also create a node, if there isn't
+ * already a node that describes the setting of bit.
+ */
+ nodep = node_split(s, idx & -MASK_BITS);
+
+ /* Set the bit within the nodes mask */
+ assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1);
+ assert(!(nodep->mask & (1 << (idx - nodep->idx))));
+ nodep->mask |= 1 << (idx - nodep->idx);
+ s->num_set++;
+
+ node_reduce(s, nodep);
+}
+
+/* Within the sparsebit array pointed to by s, clears the bit
+ * at the index given by idx.
+ */
+static void bit_clear(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ struct node *nodep;
+
+ /* Skip bits that are already cleared */
+ if (!sparsebit_is_set(s, idx))
+ return;
+
+ /* Is there a node that describes the setting of this bit? */
+ nodep = node_find(s, idx);
+ if (!nodep)
+ return;
+
+ /*
+ * If a num_after bit, split the node, so that the bit is
+ * part of a node mask.
+ */
+ if (idx >= nodep->idx + MASK_BITS)
+ nodep = node_split(s, idx & -MASK_BITS);
+
+ /*
+ * After node_split above, bit at idx should be within the mask.
+ * Clear that bit.
+ */
+ assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1);
+ assert(nodep->mask & (1 << (idx - nodep->idx)));
+ nodep->mask &= ~(1 << (idx - nodep->idx));
+ assert(s->num_set > 0 || sparsebit_all_set(s));
+ s->num_set--;
+
+ node_reduce(s, nodep);
+}
+
+/* Recursively dumps to the FILE stream given by stream the contents
+ * of the sub-tree of nodes pointed to by nodep. Each line of output
+ * is prefixed by the number of spaces given by indent. On each
+ * recursion, the indent amount is increased by 2. This causes nodes
+ * at each level deeper into the binary search tree to be displayed
+ * with a greater indent.
+ */
+static void dump_nodes(FILE *stream, struct node *nodep,
+ unsigned int indent)
+{
+ char *node_type;
+
+ /* Dump contents of node */
+ if (!nodep->parent)
+ node_type = "root";
+ else if (nodep == nodep->parent->left)
+ node_type = "left";
+ else {
+ assert(nodep == nodep->parent->right);
+ node_type = "right";
+ }
+ fprintf(stream, "%*s---- %s nodep: %p\n", indent, "", node_type, nodep);
+ fprintf(stream, "%*s parent: %p left: %p right: %p\n", indent, "",
+ nodep->parent, nodep->left, nodep->right);
+ fprintf(stream, "%*s idx: 0x%lx mask: 0x%x num_after: 0x%lx\n",
+ indent, "", nodep->idx, nodep->mask, nodep->num_after);
+
+ /* If present, dump contents of left child nodes */
+ if (nodep->left)
+ dump_nodes(stream, nodep->left, indent + 2);
+
+ /* If present, dump contents of right child nodes */
+ if (nodep->right)
+ dump_nodes(stream, nodep->right, indent + 2);
+}
+
+static inline sparsebit_idx_t node_first_set(struct node *nodep, int start)
+{
+ mask_t leading = (mask_t)1 << start;
+ int n1 = __builtin_ctz(nodep->mask & -leading);
+
+ return nodep->idx + n1;
+}
+
+static inline sparsebit_idx_t node_first_clear(struct node *nodep, int start)
+{
+ mask_t leading = (mask_t)1 << start;
+ int n1 = __builtin_ctz(~nodep->mask & -leading);
+
+ return nodep->idx + n1;
+}
+
+/* Dumps to the FILE stream specified by stream, the implementation dependent
+ * internal state of s. Each line of output is prefixed with the number
+ * of spaces given by indent. The output is completely implementation
+ * dependent and subject to change. Output from this function should only
+ * be used for diagnostic purposes. For example, this function can be
+ * used by test cases after they detect an unexpected condition, as a means
+ * to capture diagnostic information.
+ */
+static void sparsebit_dump_internal(FILE *stream, struct sparsebit *s,
+ unsigned int indent)
+{
+ /* Dump the contents of s */
+ fprintf(stream, "%*sroot: %p\n", indent, "", s->root);
+ fprintf(stream, "%*snum_set: 0x%lx\n", indent, "", s->num_set);
+
+ if (s->root)
+ dump_nodes(stream, s->root, indent);
+}
+
+/* Allocates and returns a new sparsebit array. The initial state
+ * of the newly allocated sparsebit array has all bits cleared.
+ */
+struct sparsebit *sparsebit_alloc(void)
+{
+ struct sparsebit *s;
+
+ /* Allocate top level structure. */
+ s = calloc(1, sizeof(*s));
+ if (!s) {
+ perror("calloc");
+ abort();
+ }
+
+ return s;
+}
+
+/* Frees the implementation dependent data for the sparsebit array
+ * pointed to by s and poisons the pointer to that data.
+ */
+void sparsebit_free(struct sparsebit **sbitp)
+{
+ struct sparsebit *s = *sbitp;
+
+ if (!s)
+ return;
+
+ sparsebit_clear_all(s);
+ free(s);
+ *sbitp = NULL;
+}
+
+/* Makes a copy of the sparsebit array given by s, to the sparsebit
+ * array given by d. Note, d must have already been allocated via
+ * sparsebit_alloc(). It can though already have bits set, which
+ * if different from src will be cleared.
+ */
+void sparsebit_copy(struct sparsebit *d, struct sparsebit *s)
+{
+ /* First clear any bits already set in the destination */
+ sparsebit_clear_all(d);
+
+ if (s->root) {
+ d->root = node_copy_subtree(s->root);
+ d->num_set = s->num_set;
+ }
+}
+
+/* Returns whether num consecutive bits starting at idx are all set. */
+bool sparsebit_is_set_num(struct sparsebit *s,
+ sparsebit_idx_t idx, sparsebit_num_t num)
+{
+ sparsebit_idx_t next_cleared;
+
+ assert(num > 0);
+ assert(idx + num - 1 >= idx);
+
+ /* With num > 0, the first bit must be set. */
+ if (!sparsebit_is_set(s, idx))
+ return false;
+
+ /* Find the next cleared bit */
+ next_cleared = sparsebit_next_clear(s, idx);
+
+ /*
+ * If no cleared bits beyond idx, then there are at least num
+ * set bits. idx + num doesn't wrap. Otherwise check if
+ * there are enough set bits between idx and the next cleared bit.
+ */
+ return next_cleared == 0 || next_cleared - idx >= num;
+}
+
+/* Returns whether the bit at the index given by idx. */
+bool sparsebit_is_clear(struct sparsebit *s,
+ sparsebit_idx_t idx)
+{
+ return !sparsebit_is_set(s, idx);
+}
+
+/* Returns whether num consecutive bits starting at idx are all cleared. */
+bool sparsebit_is_clear_num(struct sparsebit *s,
+ sparsebit_idx_t idx, sparsebit_num_t num)
+{
+ sparsebit_idx_t next_set;
+
+ assert(num > 0);
+ assert(idx + num - 1 >= idx);
+
+ /* With num > 0, the first bit must be cleared. */
+ if (!sparsebit_is_clear(s, idx))
+ return false;
+
+ /* Find the next set bit */
+ next_set = sparsebit_next_set(s, idx);
+
+ /*
+ * If no set bits beyond idx, then there are at least num
+ * cleared bits. idx + num doesn't wrap. Otherwise check if
+ * there are enough cleared bits between idx and the next set bit.
+ */
+ return next_set == 0 || next_set - idx >= num;
+}
+
+/* Returns the total number of bits set. Note: 0 is also returned for
+ * the case of all bits set. This is because with all bits set, there
+ * is 1 additional bit set beyond what can be represented in the return
+ * value. Use sparsebit_any_set(), instead of sparsebit_num_set() > 0,
+ * to determine if the sparsebit array has any bits set.
+ */
+sparsebit_num_t sparsebit_num_set(struct sparsebit *s)
+{
+ return s->num_set;
+}
+
+/* Returns whether any bit is set in the sparsebit array. */
+bool sparsebit_any_set(struct sparsebit *s)
+{
+ /*
+ * Nodes only describe set bits. If any nodes then there
+ * is at least 1 bit set.
+ */
+ if (!s->root)
+ return false;
+
+ /*
+ * Every node should have a non-zero mask. For now will
+ * just assure that the root node has a non-zero mask,
+ * which is a quick check that at least 1 bit is set.
+ */
+ assert(s->root->mask != 0);
+ assert(s->num_set > 0 ||
+ (s->root->num_after == ((sparsebit_num_t) 0) - MASK_BITS &&
+ s->root->mask == ~(mask_t) 0));
+
+ return true;
+}
+
+/* Returns whether all the bits in the sparsebit array are cleared. */
+bool sparsebit_all_clear(struct sparsebit *s)
+{
+ return !sparsebit_any_set(s);
+}
+
+/* Returns whether all the bits in the sparsebit array are set. */
+bool sparsebit_any_clear(struct sparsebit *s)
+{
+ return !sparsebit_all_set(s);
+}
+
+/* Returns the index of the first set bit. Abort if no bits are set.
+ */
+sparsebit_idx_t sparsebit_first_set(struct sparsebit *s)
+{
+ struct node *nodep;
+
+ /* Validate at least 1 bit is set */
+ assert(sparsebit_any_set(s));
+
+ nodep = node_first(s);
+ return node_first_set(nodep, 0);
+}
+
+/* Returns the index of the first cleared bit. Abort if
+ * no bits are cleared.
+ */
+sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s)
+{
+ struct node *nodep1, *nodep2;
+
+ /* Validate at least 1 bit is cleared. */
+ assert(sparsebit_any_clear(s));
+
+ /* If no nodes or first node index > 0 then lowest cleared is 0 */
+ nodep1 = node_first(s);
+ if (!nodep1 || nodep1->idx > 0)
+ return 0;
+
+ /* Does the mask in the first node contain any cleared bits. */
+ if (nodep1->mask != ~(mask_t) 0)
+ return node_first_clear(nodep1, 0);
+
+ /*
+ * All mask bits set in first node. If there isn't a second node
+ * then the first cleared bit is the first bit after the bits
+ * described by the first node.
+ */
+ nodep2 = node_next(s, nodep1);
+ if (!nodep2) {
+ /*
+ * No second node. First cleared bit is first bit beyond
+ * bits described by first node.
+ */
+ assert(nodep1->mask == ~(mask_t) 0);
+ assert(nodep1->idx + MASK_BITS + nodep1->num_after != (sparsebit_idx_t) 0);
+ return nodep1->idx + MASK_BITS + nodep1->num_after;
+ }
+
+ /*
+ * There is a second node.
+ * If it is not adjacent to the first node, then there is a gap
+ * of cleared bits between the nodes, and the first cleared bit
+ * is the first bit within the gap.
+ */
+ if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx)
+ return nodep1->idx + MASK_BITS + nodep1->num_after;
+
+ /*
+ * Second node is adjacent to the first node.
+ * Because it is adjacent, its mask should be non-zero. If all
+ * its mask bits are set, then with it being adjacent, it should
+ * have had the mask bits moved into the num_after setting of the
+ * previous node.
+ */
+ return node_first_clear(nodep2, 0);
+}
+
+/* Returns index of next bit set within s after the index given by prev.
+ * Returns 0 if there are no bits after prev that are set.
+ */
+sparsebit_idx_t sparsebit_next_set(struct sparsebit *s,
+ sparsebit_idx_t prev)
+{
+ sparsebit_idx_t lowest_possible = prev + 1;
+ sparsebit_idx_t start;
+ struct node *nodep;
+
+ /* A bit after the highest index can't be set. */
+ if (lowest_possible == 0)
+ return 0;
+
+ /*
+ * Find the leftmost 'candidate' overlapping or to the right
+ * of lowest_possible.
+ */
+ struct node *candidate = NULL;
+
+ /* True iff lowest_possible is within candidate */
+ bool contains = false;
+
+ /*
+ * Find node that describes setting of bit at lowest_possible.
+ * If such a node doesn't exist, find the node with the lowest
+ * starting index that is > lowest_possible.
+ */
+ for (nodep = s->root; nodep;) {
+ if ((nodep->idx + MASK_BITS + nodep->num_after - 1)
+ >= lowest_possible) {
+ candidate = nodep;
+ if (candidate->idx <= lowest_possible) {
+ contains = true;
+ break;
+ }
+ nodep = nodep->left;
+ } else {
+ nodep = nodep->right;
+ }
+ }
+ if (!candidate)
+ return 0;
+
+ assert(candidate->mask != 0);
+
+ /* Does the candidate node describe the setting of lowest_possible? */
+ if (!contains) {
+ /*
+ * Candidate doesn't describe setting of bit at lowest_possible.
+ * Candidate points to the first node with a starting index
+ * > lowest_possible.
+ */
+ assert(candidate->idx > lowest_possible);
+
+ return node_first_set(candidate, 0);
+ }
+
+ /*
+ * Candidate describes setting of bit at lowest_possible.
+ * Note: although the node describes the setting of the bit
+ * at lowest_possible, its possible that its setting and the
+ * setting of all latter bits described by this node are 0.
+ * For now, just handle the cases where this node describes
+ * a bit at or after an index of lowest_possible that is set.
+ */
+ start = lowest_possible - candidate->idx;
+
+ if (start < MASK_BITS && candidate->mask >= (1 << start))
+ return node_first_set(candidate, start);
+
+ if (candidate->num_after) {
+ sparsebit_idx_t first_num_after_idx = candidate->idx + MASK_BITS;
+
+ return lowest_possible < first_num_after_idx
+ ? first_num_after_idx : lowest_possible;
+ }
+
+ /*
+ * Although candidate node describes setting of bit at
+ * the index of lowest_possible, all bits at that index and
+ * latter that are described by candidate are cleared. With
+ * this, the next bit is the first bit in the next node, if
+ * such a node exists. If a next node doesn't exist, then
+ * there is no next set bit.
+ */
+ candidate = node_next(s, candidate);
+ if (!candidate)
+ return 0;
+
+ return node_first_set(candidate, 0);
+}
+
+/* Returns index of next bit cleared within s after the index given by prev.
+ * Returns 0 if there are no bits after prev that are cleared.
+ */
+sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s,
+ sparsebit_idx_t prev)
+{
+ sparsebit_idx_t lowest_possible = prev + 1;
+ sparsebit_idx_t idx;
+ struct node *nodep1, *nodep2;
+
+ /* A bit after the highest index can't be set. */
+ if (lowest_possible == 0)
+ return 0;
+
+ /*
+ * Does a node describing the setting of lowest_possible exist?
+ * If not, the bit at lowest_possible is cleared.
+ */
+ nodep1 = node_find(s, lowest_possible);
+ if (!nodep1)
+ return lowest_possible;
+
+ /* Does a mask bit in node 1 describe the next cleared bit. */
+ for (idx = lowest_possible - nodep1->idx; idx < MASK_BITS; idx++)
+ if (!(nodep1->mask & (1 << idx)))
+ return nodep1->idx + idx;
+
+ /*
+ * Next cleared bit is not described by node 1. If there
+ * isn't a next node, then next cleared bit is described
+ * by bit after the bits described by the first node.
+ */
+ nodep2 = node_next(s, nodep1);
+ if (!nodep2)
+ return nodep1->idx + MASK_BITS + nodep1->num_after;
+
+ /*
+ * There is a second node.
+ * If it is not adjacent to the first node, then there is a gap
+ * of cleared bits between the nodes, and the next cleared bit
+ * is the first bit within the gap.
+ */
+ if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx)
+ return nodep1->idx + MASK_BITS + nodep1->num_after;
+
+ /*
+ * Second node is adjacent to the first node.
+ * Because it is adjacent, its mask should be non-zero. If all
+ * its mask bits are set, then with it being adjacent, it should
+ * have had the mask bits moved into the num_after setting of the
+ * previous node.
+ */
+ return node_first_clear(nodep2, 0);
+}
+
+/* Starting with the index 1 greater than the index given by start, finds
+ * and returns the index of the first sequence of num consecutively set
+ * bits. Returns a value of 0 of no such sequence exists.
+ */
+sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s,
+ sparsebit_idx_t start, sparsebit_num_t num)
+{
+ sparsebit_idx_t idx;
+
+ assert(num >= 1);
+
+ for (idx = sparsebit_next_set(s, start);
+ idx != 0 && idx + num - 1 >= idx;
+ idx = sparsebit_next_set(s, idx)) {
+ assert(sparsebit_is_set(s, idx));
+
+ /*
+ * Does the sequence of bits starting at idx consist of
+ * num set bits?
+ */
+ if (sparsebit_is_set_num(s, idx, num))
+ return idx;
+
+ /*
+ * Sequence of set bits at idx isn't large enough.
+ * Skip this entire sequence of set bits.
+ */
+ idx = sparsebit_next_clear(s, idx);
+ if (idx == 0)
+ return 0;
+ }
+
+ return 0;
+}
+
+/* Starting with the index 1 greater than the index given by start, finds
+ * and returns the index of the first sequence of num consecutively cleared
+ * bits. Returns a value of 0 of no such sequence exists.
+ */
+sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *s,
+ sparsebit_idx_t start, sparsebit_num_t num)
+{
+ sparsebit_idx_t idx;
+
+ assert(num >= 1);
+
+ for (idx = sparsebit_next_clear(s, start);
+ idx != 0 && idx + num - 1 >= idx;
+ idx = sparsebit_next_clear(s, idx)) {
+ assert(sparsebit_is_clear(s, idx));
+
+ /*
+ * Does the sequence of bits starting at idx consist of
+ * num cleared bits?
+ */
+ if (sparsebit_is_clear_num(s, idx, num))
+ return idx;
+
+ /*
+ * Sequence of cleared bits at idx isn't large enough.
+ * Skip this entire sequence of cleared bits.
+ */
+ idx = sparsebit_next_set(s, idx);
+ if (idx == 0)
+ return 0;
+ }
+
+ return 0;
+}
+
+/* Sets the bits * in the inclusive range idx through idx + num - 1. */
+void sparsebit_set_num(struct sparsebit *s,
+ sparsebit_idx_t start, sparsebit_num_t num)
+{
+ struct node *nodep, *next;
+ unsigned int n1;
+ sparsebit_idx_t idx;
+ sparsebit_num_t n;
+ sparsebit_idx_t middle_start, middle_end;
+
+ assert(num > 0);
+ assert(start + num - 1 >= start);
+
+ /*
+ * Leading - bits before first mask boundary.
+ *
+ * TODO(lhuemill): With some effort it may be possible to
+ * replace the following loop with a sequential sequence
+ * of statements. High level sequence would be:
+ *
+ * 1. Use node_split() to force node that describes setting
+ * of idx to be within the mask portion of a node.
+ * 2. Form mask of bits to be set.
+ * 3. Determine number of mask bits already set in the node
+ * and store in a local variable named num_already_set.
+ * 4. Set the appropriate mask bits within the node.
+ * 5. Increment struct sparsebit_pvt num_set member
+ * by the number of bits that were actually set.
+ * Exclude from the counts bits that were already set.
+ * 6. Before returning to the caller, use node_reduce() to
+ * handle the multiple corner cases that this method
+ * introduces.
+ */
+ for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--)
+ bit_set(s, idx);
+
+ /* Middle - bits spanning one or more entire mask */
+ middle_start = idx;
+ middle_end = middle_start + (n & -MASK_BITS) - 1;
+ if (n >= MASK_BITS) {
+ nodep = node_split(s, middle_start);
+
+ /*
+ * As needed, split just after end of middle bits.
+ * No split needed if end of middle bits is at highest
+ * supported bit index.
+ */
+ if (middle_end + 1 > middle_end)
+ (void) node_split(s, middle_end + 1);
+
+ /* Delete nodes that only describe bits within the middle. */
+ for (next = node_next(s, nodep);
+ next && (next->idx < middle_end);
+ next = node_next(s, nodep)) {
+ assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end);
+ node_rm(s, next);
+ next = NULL;
+ }
+
+ /* As needed set each of the mask bits */
+ for (n1 = 0; n1 < MASK_BITS; n1++) {
+ if (!(nodep->mask & (1 << n1))) {
+ nodep->mask |= 1 << n1;
+ s->num_set++;
+ }
+ }
+
+ s->num_set -= nodep->num_after;
+ nodep->num_after = middle_end - middle_start + 1 - MASK_BITS;
+ s->num_set += nodep->num_after;
+
+ node_reduce(s, nodep);
+ }
+ idx = middle_end + 1;
+ n -= middle_end - middle_start + 1;
+
+ /* Trailing - bits at and beyond last mask boundary */
+ assert(n < MASK_BITS);
+ for (; n > 0; idx++, n--)
+ bit_set(s, idx);
+}
+
+/* Clears the bits * in the inclusive range idx through idx + num - 1. */
+void sparsebit_clear_num(struct sparsebit *s,
+ sparsebit_idx_t start, sparsebit_num_t num)
+{
+ struct node *nodep, *next;
+ unsigned int n1;
+ sparsebit_idx_t idx;
+ sparsebit_num_t n;
+ sparsebit_idx_t middle_start, middle_end;
+
+ assert(num > 0);
+ assert(start + num - 1 >= start);
+
+ /* Leading - bits before first mask boundary */
+ for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--)
+ bit_clear(s, idx);
+
+ /* Middle - bits spanning one or more entire mask */
+ middle_start = idx;
+ middle_end = middle_start + (n & -MASK_BITS) - 1;
+ if (n >= MASK_BITS) {
+ nodep = node_split(s, middle_start);
+
+ /*
+ * As needed, split just after end of middle bits.
+ * No split needed if end of middle bits is at highest
+ * supported bit index.
+ */
+ if (middle_end + 1 > middle_end)
+ (void) node_split(s, middle_end + 1);
+
+ /* Delete nodes that only describe bits within the middle. */
+ for (next = node_next(s, nodep);
+ next && (next->idx < middle_end);
+ next = node_next(s, nodep)) {
+ assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end);
+ node_rm(s, next);
+ next = NULL;
+ }
+
+ /* As needed clear each of the mask bits */
+ for (n1 = 0; n1 < MASK_BITS; n1++) {
+ if (nodep->mask & (1 << n1)) {
+ nodep->mask &= ~(1 << n1);
+ s->num_set--;
+ }
+ }
+
+ /* Clear any bits described by num_after */
+ s->num_set -= nodep->num_after;
+ nodep->num_after = 0;
+
+ /*
+ * Delete the node that describes the beginning of
+ * the middle bits and perform any allowed reductions
+ * with the nodes prev or next of nodep.
+ */
+ node_reduce(s, nodep);
+ nodep = NULL;
+ }
+ idx = middle_end + 1;
+ n -= middle_end - middle_start + 1;
+
+ /* Trailing - bits at and beyond last mask boundary */
+ assert(n < MASK_BITS);
+ for (; n > 0; idx++, n--)
+ bit_clear(s, idx);
+}
+
+/* Sets the bit at the index given by idx. */
+void sparsebit_set(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ sparsebit_set_num(s, idx, 1);
+}
+
+/* Clears the bit at the index given by idx. */
+void sparsebit_clear(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ sparsebit_clear_num(s, idx, 1);
+}
+
+/* Sets the bits in the entire addressable range of the sparsebit array. */
+void sparsebit_set_all(struct sparsebit *s)
+{
+ sparsebit_set(s, 0);
+ sparsebit_set_num(s, 1, ~(sparsebit_idx_t) 0);
+ assert(sparsebit_all_set(s));
+}
+
+/* Clears the bits in the entire addressable range of the sparsebit array. */
+void sparsebit_clear_all(struct sparsebit *s)
+{
+ sparsebit_clear(s, 0);
+ sparsebit_clear_num(s, 1, ~(sparsebit_idx_t) 0);
+ assert(!sparsebit_any_set(s));
+}
+
+static size_t display_range(FILE *stream, sparsebit_idx_t low,
+ sparsebit_idx_t high, bool prepend_comma_space)
+{
+ char *fmt_str;
+ size_t sz;
+
+ /* Determine the printf format string */
+ if (low == high)
+ fmt_str = prepend_comma_space ? ", 0x%lx" : "0x%lx";
+ else
+ fmt_str = prepend_comma_space ? ", 0x%lx:0x%lx" : "0x%lx:0x%lx";
+
+ /*
+ * When stream is NULL, just determine the size of what would
+ * have been printed, else print the range.
+ */
+ if (!stream)
+ sz = snprintf(NULL, 0, fmt_str, low, high);
+ else
+ sz = fprintf(stream, fmt_str, low, high);
+
+ return sz;
+}
+
+
+/* Dumps to the FILE stream given by stream, the bit settings
+ * of s. Each line of output is prefixed with the number of
+ * spaces given by indent. The length of each line is implementation
+ * dependent and does not depend on the indent amount. The following
+ * is an example output of a sparsebit array that has bits:
+ *
+ * 0x5, 0x8, 0xa:0xe, 0x12
+ *
+ * This corresponds to a sparsebit whose bits 5, 8, 10, 11, 12, 13, 14, 18
+ * are set. Note that a ':', instead of a '-' is used to specify a range of
+ * contiguous bits. This is done because '-' is used to specify command-line
+ * options, and sometimes ranges are specified as command-line arguments.
+ */
+void sparsebit_dump(FILE *stream, struct sparsebit *s,
+ unsigned int indent)
+{
+ size_t current_line_len = 0;
+ size_t sz;
+ struct node *nodep;
+
+ if (!sparsebit_any_set(s))
+ return;
+
+ /* Display initial indent */
+ fprintf(stream, "%*s", indent, "");
+
+ /* For each node */
+ for (nodep = node_first(s); nodep; nodep = node_next(s, nodep)) {
+ unsigned int n1;
+ sparsebit_idx_t low, high;
+
+ /* For each group of bits in the mask */
+ for (n1 = 0; n1 < MASK_BITS; n1++) {
+ if (nodep->mask & (1 << n1)) {
+ low = high = nodep->idx + n1;
+
+ for (; n1 < MASK_BITS; n1++) {
+ if (nodep->mask & (1 << n1))
+ high = nodep->idx + n1;
+ else
+ break;
+ }
+
+ if ((n1 == MASK_BITS) && nodep->num_after)
+ high += nodep->num_after;
+
+ /*
+ * How much room will it take to display
+ * this range.
+ */
+ sz = display_range(NULL, low, high,
+ current_line_len != 0);
+
+ /*
+ * If there is not enough room, display
+ * a newline plus the indent of the next
+ * line.
+ */
+ if (current_line_len + sz > DUMP_LINE_MAX) {
+ fputs("\n", stream);
+ fprintf(stream, "%*s", indent, "");
+ current_line_len = 0;
+ }
+
+ /* Display the range */
+ sz = display_range(stream, low, high,
+ current_line_len != 0);
+ current_line_len += sz;
+ }
+ }
+
+ /*
+ * If num_after and most significant-bit of mask is not
+ * set, then still need to display a range for the bits
+ * described by num_after.
+ */
+ if (!(nodep->mask & (1 << (MASK_BITS - 1))) && nodep->num_after) {
+ low = nodep->idx + MASK_BITS;
+ high = nodep->idx + MASK_BITS + nodep->num_after - 1;
+
+ /*
+ * How much room will it take to display
+ * this range.
+ */
+ sz = display_range(NULL, low, high,
+ current_line_len != 0);
+
+ /*
+ * If there is not enough room, display
+ * a newline plus the indent of the next
+ * line.
+ */
+ if (current_line_len + sz > DUMP_LINE_MAX) {
+ fputs("\n", stream);
+ fprintf(stream, "%*s", indent, "");
+ current_line_len = 0;
+ }
+
+ /* Display the range */
+ sz = display_range(stream, low, high,
+ current_line_len != 0);
+ current_line_len += sz;
+ }
+ }
+ fputs("\n", stream);
+}
+
+/* Validates the internal state of the sparsebit array given by
+ * s. On error, diagnostic information is printed to stderr and
+ * abort is called.
+ */
+void sparsebit_validate_internal(struct sparsebit *s)
+{
+ bool error_detected = false;
+ struct node *nodep, *prev = NULL;
+ sparsebit_num_t total_bits_set = 0;
+ unsigned int n1;
+
+ /* For each node */
+ for (nodep = node_first(s); nodep;
+ prev = nodep, nodep = node_next(s, nodep)) {
+
+ /*
+ * Increase total bits set by the number of bits set
+ * in this node.
+ */
+ for (n1 = 0; n1 < MASK_BITS; n1++)
+ if (nodep->mask & (1 << n1))
+ total_bits_set++;
+
+ total_bits_set += nodep->num_after;
+
+ /*
+ * Arbitrary choice as to whether a mask of 0 is allowed
+ * or not. For diagnostic purposes it is beneficial to
+ * have only one valid means to represent a set of bits.
+ * To support this an arbitrary choice has been made
+ * to not allow a mask of zero.
+ */
+ if (nodep->mask == 0) {
+ fprintf(stderr, "Node mask of zero, "
+ "nodep: %p nodep->mask: 0x%x",
+ nodep, nodep->mask);
+ error_detected = true;
+ break;
+ }
+
+ /*
+ * Validate num_after is not greater than the max index
+ * - the number of mask bits. The num_after member
+ * uses 0-based indexing and thus has no value that
+ * represents all bits set. This limitation is handled
+ * by requiring a non-zero mask. With a non-zero mask,
+ * MASK_BITS worth of bits are described by the mask,
+ * which makes the largest needed num_after equal to:
+ *
+ * (~(sparsebit_num_t) 0) - MASK_BITS + 1
+ */
+ if (nodep->num_after
+ > (~(sparsebit_num_t) 0) - MASK_BITS + 1) {
+ fprintf(stderr, "num_after too large, "
+ "nodep: %p nodep->num_after: 0x%lx",
+ nodep, nodep->num_after);
+ error_detected = true;
+ break;
+ }
+
+ /* Validate node index is divisible by the mask size */
+ if (nodep->idx % MASK_BITS) {
+ fprintf(stderr, "Node index not divisible by "
+ "mask size,\n"
+ " nodep: %p nodep->idx: 0x%lx "
+ "MASK_BITS: %lu\n",
+ nodep, nodep->idx, MASK_BITS);
+ error_detected = true;
+ break;
+ }
+
+ /*
+ * Validate bits described by node don't wrap beyond the
+ * highest supported index.
+ */
+ if ((nodep->idx + MASK_BITS + nodep->num_after - 1) < nodep->idx) {
+ fprintf(stderr, "Bits described by node wrap "
+ "beyond highest supported index,\n"
+ " nodep: %p nodep->idx: 0x%lx\n"
+ " MASK_BITS: %lu nodep->num_after: 0x%lx",
+ nodep, nodep->idx, MASK_BITS, nodep->num_after);
+ error_detected = true;
+ break;
+ }
+
+ /* Check parent pointers. */
+ if (nodep->left) {
+ if (nodep->left->parent != nodep) {
+ fprintf(stderr, "Left child parent pointer "
+ "doesn't point to this node,\n"
+ " nodep: %p nodep->left: %p "
+ "nodep->left->parent: %p",
+ nodep, nodep->left,
+ nodep->left->parent);
+ error_detected = true;
+ break;
+ }
+ }
+
+ if (nodep->right) {
+ if (nodep->right->parent != nodep) {
+ fprintf(stderr, "Right child parent pointer "
+ "doesn't point to this node,\n"
+ " nodep: %p nodep->right: %p "
+ "nodep->right->parent: %p",
+ nodep, nodep->right,
+ nodep->right->parent);
+ error_detected = true;
+ break;
+ }
+ }
+
+ if (!nodep->parent) {
+ if (s->root != nodep) {
+ fprintf(stderr, "Unexpected root node, "
+ "s->root: %p nodep: %p",
+ s->root, nodep);
+ error_detected = true;
+ break;
+ }
+ }
+
+ if (prev) {
+ /*
+ * Is index of previous node before index of
+ * current node?
+ */
+ if (prev->idx >= nodep->idx) {
+ fprintf(stderr, "Previous node index "
+ ">= current node index,\n"
+ " prev: %p prev->idx: 0x%lx\n"
+ " nodep: %p nodep->idx: 0x%lx",
+ prev, prev->idx, nodep, nodep->idx);
+ error_detected = true;
+ break;
+ }
+
+ /*
+ * Nodes occur in asscending order, based on each
+ * nodes starting index.
+ */
+ if ((prev->idx + MASK_BITS + prev->num_after - 1)
+ >= nodep->idx) {
+ fprintf(stderr, "Previous node bit range "
+ "overlap with current node bit range,\n"
+ " prev: %p prev->idx: 0x%lx "
+ "prev->num_after: 0x%lx\n"
+ " nodep: %p nodep->idx: 0x%lx "
+ "nodep->num_after: 0x%lx\n"
+ " MASK_BITS: %lu",
+ prev, prev->idx, prev->num_after,
+ nodep, nodep->idx, nodep->num_after,
+ MASK_BITS);
+ error_detected = true;
+ break;
+ }
+
+ /*
+ * When the node has all mask bits set, it shouldn't
+ * be adjacent to the last bit described by the
+ * previous node.
+ */
+ if (nodep->mask == ~(mask_t) 0 &&
+ prev->idx + MASK_BITS + prev->num_after == nodep->idx) {
+ fprintf(stderr, "Current node has mask with "
+ "all bits set and is adjacent to the "
+ "previous node,\n"
+ " prev: %p prev->idx: 0x%lx "
+ "prev->num_after: 0x%lx\n"
+ " nodep: %p nodep->idx: 0x%lx "
+ "nodep->num_after: 0x%lx\n"
+ " MASK_BITS: %lu",
+ prev, prev->idx, prev->num_after,
+ nodep, nodep->idx, nodep->num_after,
+ MASK_BITS);
+
+ error_detected = true;
+ break;
+ }
+ }
+ }
+
+ if (!error_detected) {
+ /*
+ * Is sum of bits set in each node equal to the count
+ * of total bits set.
+ */
+ if (s->num_set != total_bits_set) {
+ fprintf(stderr, "Number of bits set missmatch,\n"
+ " s->num_set: 0x%lx total_bits_set: 0x%lx",
+ s->num_set, total_bits_set);
+
+ error_detected = true;
+ }
+ }
+
+ if (error_detected) {
+ fputs(" dump_internal:\n", stderr);
+ sparsebit_dump_internal(stderr, s, 4);
+ abort();
+ }
+}
+
+
+#ifdef FUZZ
+/* A simple but effective fuzzing driver. Look for bugs with the help
+ * of some invariants and of a trivial representation of sparsebit.
+ * Just use 512 bytes of /dev/zero and /dev/urandom as inputs, and let
+ * afl-fuzz do the magic. :)
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+
+struct range {
+ sparsebit_idx_t first, last;
+ bool set;
+};
+
+struct sparsebit *s;
+struct range ranges[1000];
+int num_ranges;
+
+static bool get_value(sparsebit_idx_t idx)
+{
+ int i;
+
+ for (i = num_ranges; --i >= 0; )
+ if (ranges[i].first <= idx && idx <= ranges[i].last)
+ return ranges[i].set;
+
+ return false;
+}
+
+static void operate(int code, sparsebit_idx_t first, sparsebit_idx_t last)
+{
+ sparsebit_num_t num;
+ sparsebit_idx_t next;
+
+ if (first < last) {
+ num = last - first + 1;
+ } else {
+ num = first - last + 1;
+ first = last;
+ last = first + num - 1;
+ }
+
+ switch (code) {
+ case 0:
+ sparsebit_set(s, first);
+ assert(sparsebit_is_set(s, first));
+ assert(!sparsebit_is_clear(s, first));
+ assert(sparsebit_any_set(s));
+ assert(!sparsebit_all_clear(s));
+ if (get_value(first))
+ return;
+ if (num_ranges == 1000)
+ exit(0);
+ ranges[num_ranges++] = (struct range)
+ { .first = first, .last = first, .set = true };
+ break;
+ case 1:
+ sparsebit_clear(s, first);
+ assert(!sparsebit_is_set(s, first));
+ assert(sparsebit_is_clear(s, first));
+ assert(sparsebit_any_clear(s));
+ assert(!sparsebit_all_set(s));
+ if (!get_value(first))
+ return;
+ if (num_ranges == 1000)
+ exit(0);
+ ranges[num_ranges++] = (struct range)
+ { .first = first, .last = first, .set = false };
+ break;
+ case 2:
+ assert(sparsebit_is_set(s, first) == get_value(first));
+ assert(sparsebit_is_clear(s, first) == !get_value(first));
+ break;
+ case 3:
+ if (sparsebit_any_set(s))
+ assert(get_value(sparsebit_first_set(s)));
+ if (sparsebit_any_clear(s))
+ assert(!get_value(sparsebit_first_clear(s)));
+ sparsebit_set_all(s);
+ assert(!sparsebit_any_clear(s));
+ assert(sparsebit_all_set(s));
+ num_ranges = 0;
+ ranges[num_ranges++] = (struct range)
+ { .first = 0, .last = ~(sparsebit_idx_t)0, .set = true };
+ break;
+ case 4:
+ if (sparsebit_any_set(s))
+ assert(get_value(sparsebit_first_set(s)));
+ if (sparsebit_any_clear(s))
+ assert(!get_value(sparsebit_first_clear(s)));
+ sparsebit_clear_all(s);
+ assert(!sparsebit_any_set(s));
+ assert(sparsebit_all_clear(s));
+ num_ranges = 0;
+ break;
+ case 5:
+ next = sparsebit_next_set(s, first);
+ assert(next == 0 || next > first);
+ assert(next == 0 || get_value(next));
+ break;
+ case 6:
+ next = sparsebit_next_clear(s, first);
+ assert(next == 0 || next > first);
+ assert(next == 0 || !get_value(next));
+ break;
+ case 7:
+ next = sparsebit_next_clear(s, first);
+ if (sparsebit_is_set_num(s, first, num)) {
+ assert(next == 0 || next > last);
+ if (first)
+ next = sparsebit_next_set(s, first - 1);
+ else if (sparsebit_any_set(s))
+ next = sparsebit_first_set(s);
+ else
+ return;
+ assert(next == first);
+ } else {
+ assert(sparsebit_is_clear(s, first) || next <= last);
+ }
+ break;
+ case 8:
+ next = sparsebit_next_set(s, first);
+ if (sparsebit_is_clear_num(s, first, num)) {
+ assert(next == 0 || next > last);
+ if (first)
+ next = sparsebit_next_clear(s, first - 1);
+ else if (sparsebit_any_clear(s))
+ next = sparsebit_first_clear(s);
+ else
+ return;
+ assert(next == first);
+ } else {
+ assert(sparsebit_is_set(s, first) || next <= last);
+ }
+ break;
+ case 9:
+ sparsebit_set_num(s, first, num);
+ assert(sparsebit_is_set_num(s, first, num));
+ assert(!sparsebit_is_clear_num(s, first, num));
+ assert(sparsebit_any_set(s));
+ assert(!sparsebit_all_clear(s));
+ if (num_ranges == 1000)
+ exit(0);
+ ranges[num_ranges++] = (struct range)
+ { .first = first, .last = last, .set = true };
+ break;
+ case 10:
+ sparsebit_clear_num(s, first, num);
+ assert(!sparsebit_is_set_num(s, first, num));
+ assert(sparsebit_is_clear_num(s, first, num));
+ assert(sparsebit_any_clear(s));
+ assert(!sparsebit_all_set(s));
+ if (num_ranges == 1000)
+ exit(0);
+ ranges[num_ranges++] = (struct range)
+ { .first = first, .last = last, .set = false };
+ break;
+ case 11:
+ sparsebit_validate_internal(s);
+ break;
+ default:
+ break;
+ }
+}
+
+unsigned char get8(void)
+{
+ int ch;
+
+ ch = getchar();
+ if (ch == EOF)
+ exit(0);
+ return ch;
+}
+
+uint64_t get64(void)
+{
+ uint64_t x;
+
+ x = get8();
+ x = (x << 8) | get8();
+ x = (x << 8) | get8();
+ x = (x << 8) | get8();
+ x = (x << 8) | get8();
+ x = (x << 8) | get8();
+ x = (x << 8) | get8();
+ return (x << 8) | get8();
+}
+
+int main(void)
+{
+ s = sparsebit_alloc();
+ for (;;) {
+ uint8_t op = get8() & 0xf;
+ uint64_t first = get64();
+ uint64_t last = get64();
+
+ operate(op, first, last);
+ }
+}
+#endif
diff --git a/tools/testing/selftests/kvm/lib/vmx.c b/tools/testing/selftests/kvm/lib/vmx.c
new file mode 100644
index 000000000000..0231bc0aae7b
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/vmx.c
@@ -0,0 +1,243 @@
+/*
+ * tools/testing/selftests/kvm/lib/x86.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "x86.h"
+#include "vmx.h"
+
+/* Create a default VM for VMX tests.
+ *
+ * Input Args:
+ * vcpuid - The id of the single VCPU to add to the VM.
+ * guest_code - The vCPU's entry point
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to opaque structure that describes the created VM.
+ */
+struct kvm_vm *
+vm_create_default_vmx(uint32_t vcpuid, vmx_guest_code_t guest_code)
+{
+ struct kvm_cpuid2 *cpuid;
+ struct kvm_vm *vm;
+ vm_vaddr_t vmxon_vaddr;
+ vm_paddr_t vmxon_paddr;
+ vm_vaddr_t vmcs_vaddr;
+ vm_paddr_t vmcs_paddr;
+
+ vm = vm_create_default(vcpuid, (void *) guest_code);
+
+ /* Enable nesting in CPUID */
+ vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
+
+ /* Setup of a region of guest memory for the vmxon region. */
+ vmxon_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0);
+ vmxon_paddr = addr_gva2gpa(vm, vmxon_vaddr);
+
+ /* Setup of a region of guest memory for a vmcs. */
+ vmcs_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0);
+ vmcs_paddr = addr_gva2gpa(vm, vmcs_vaddr);
+
+ vcpu_args_set(vm, vcpuid, 4, vmxon_vaddr, vmxon_paddr, vmcs_vaddr,
+ vmcs_paddr);
+
+ return vm;
+}
+
+void prepare_for_vmx_operation(void)
+{
+ uint64_t feature_control;
+ uint64_t required;
+ unsigned long cr0;
+ unsigned long cr4;
+
+ /*
+ * Ensure bits in CR0 and CR4 are valid in VMX operation:
+ * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx.
+ * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx.
+ */
+ __asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory");
+ cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1);
+ cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0);
+ __asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory");
+
+ __asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory");
+ cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1);
+ cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0);
+ /* Enable VMX operation */
+ cr4 |= X86_CR4_VMXE;
+ __asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory");
+
+ /*
+ * Configure IA32_FEATURE_CONTROL MSR to allow VMXON:
+ * Bit 0: Lock bit. If clear, VMXON causes a #GP.
+ * Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON
+ * outside of SMX causes a #GP.
+ */
+ required = FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
+ required |= FEATURE_CONTROL_LOCKED;
+ feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
+ if ((feature_control & required) != required)
+ wrmsr(MSR_IA32_FEATURE_CONTROL, feature_control | required);
+}
+
+/*
+ * Initialize the control fields to the most basic settings possible.
+ */
+static inline void init_vmcs_control_fields(void)
+{
+ vmwrite(VIRTUAL_PROCESSOR_ID, 0);
+ vmwrite(POSTED_INTR_NV, 0);
+
+ vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PINBASED_CTLS));
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PROCBASED_CTLS));
+ vmwrite(EXCEPTION_BITMAP, 0);
+ vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
+ vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */
+ vmwrite(CR3_TARGET_COUNT, 0);
+ vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) |
+ VM_EXIT_HOST_ADDR_SPACE_SIZE); /* 64-bit host */
+ vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
+ vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
+ vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) |
+ VM_ENTRY_IA32E_MODE); /* 64-bit guest */
+ vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
+ vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
+ vmwrite(TPR_THRESHOLD, 0);
+ vmwrite(SECONDARY_VM_EXEC_CONTROL, 0);
+
+ vmwrite(CR0_GUEST_HOST_MASK, 0);
+ vmwrite(CR4_GUEST_HOST_MASK, 0);
+ vmwrite(CR0_READ_SHADOW, get_cr0());
+ vmwrite(CR4_READ_SHADOW, get_cr4());
+}
+
+/*
+ * Initialize the host state fields based on the current host state, with
+ * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch
+ * or vmresume.
+ */
+static inline void init_vmcs_host_state(void)
+{
+ uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS);
+
+ vmwrite(HOST_ES_SELECTOR, get_es());
+ vmwrite(HOST_CS_SELECTOR, get_cs());
+ vmwrite(HOST_SS_SELECTOR, get_ss());
+ vmwrite(HOST_DS_SELECTOR, get_ds());
+ vmwrite(HOST_FS_SELECTOR, get_fs());
+ vmwrite(HOST_GS_SELECTOR, get_gs());
+ vmwrite(HOST_TR_SELECTOR, get_tr());
+
+ if (exit_controls & VM_EXIT_LOAD_IA32_PAT)
+ vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT));
+ if (exit_controls & VM_EXIT_LOAD_IA32_EFER)
+ vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER));
+ if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+ vmwrite(HOST_IA32_PERF_GLOBAL_CTRL,
+ rdmsr(MSR_CORE_PERF_GLOBAL_CTRL));
+
+ vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS));
+
+ vmwrite(HOST_CR0, get_cr0());
+ vmwrite(HOST_CR3, get_cr3());
+ vmwrite(HOST_CR4, get_cr4());
+ vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE));
+ vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE));
+ vmwrite(HOST_TR_BASE,
+ get_desc64_base((struct desc64 *)(get_gdt_base() + get_tr())));
+ vmwrite(HOST_GDTR_BASE, get_gdt_base());
+ vmwrite(HOST_IDTR_BASE, get_idt_base());
+ vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP));
+ vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP));
+}
+
+/*
+ * Initialize the guest state fields essentially as a clone of
+ * the host state fields. Some host state fields have fixed
+ * values, and we set the corresponding guest state fields accordingly.
+ */
+static inline void init_vmcs_guest_state(void *rip, void *rsp)
+{
+ vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR));
+ vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR));
+ vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR));
+ vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR));
+ vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR));
+ vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR));
+ vmwrite(GUEST_LDTR_SELECTOR, 0);
+ vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR));
+ vmwrite(GUEST_INTR_STATUS, 0);
+ vmwrite(GUEST_PML_INDEX, 0);
+
+ vmwrite(VMCS_LINK_POINTER, -1ll);
+ vmwrite(GUEST_IA32_DEBUGCTL, 0);
+ vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT));
+ vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER));
+ vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL,
+ vmreadz(HOST_IA32_PERF_GLOBAL_CTRL));
+
+ vmwrite(GUEST_ES_LIMIT, -1);
+ vmwrite(GUEST_CS_LIMIT, -1);
+ vmwrite(GUEST_SS_LIMIT, -1);
+ vmwrite(GUEST_DS_LIMIT, -1);
+ vmwrite(GUEST_FS_LIMIT, -1);
+ vmwrite(GUEST_GS_LIMIT, -1);
+ vmwrite(GUEST_LDTR_LIMIT, -1);
+ vmwrite(GUEST_TR_LIMIT, 0x67);
+ vmwrite(GUEST_GDTR_LIMIT, 0xffff);
+ vmwrite(GUEST_IDTR_LIMIT, 0xffff);
+ vmwrite(GUEST_ES_AR_BYTES,
+ vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093);
+ vmwrite(GUEST_CS_AR_BYTES, 0xa09b);
+ vmwrite(GUEST_SS_AR_BYTES, 0xc093);
+ vmwrite(GUEST_DS_AR_BYTES,
+ vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+ vmwrite(GUEST_FS_AR_BYTES,
+ vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+ vmwrite(GUEST_GS_AR_BYTES,
+ vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+ vmwrite(GUEST_LDTR_AR_BYTES, 0x10000);
+ vmwrite(GUEST_TR_AR_BYTES, 0x8b);
+ vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
+ vmwrite(GUEST_ACTIVITY_STATE, 0);
+ vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS));
+ vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0);
+
+ vmwrite(GUEST_CR0, vmreadz(HOST_CR0));
+ vmwrite(GUEST_CR3, vmreadz(HOST_CR3));
+ vmwrite(GUEST_CR4, vmreadz(HOST_CR4));
+ vmwrite(GUEST_ES_BASE, 0);
+ vmwrite(GUEST_CS_BASE, 0);
+ vmwrite(GUEST_SS_BASE, 0);
+ vmwrite(GUEST_DS_BASE, 0);
+ vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE));
+ vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE));
+ vmwrite(GUEST_LDTR_BASE, 0);
+ vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE));
+ vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE));
+ vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE));
+ vmwrite(GUEST_DR7, 0x400);
+ vmwrite(GUEST_RSP, (uint64_t)rsp);
+ vmwrite(GUEST_RIP, (uint64_t)rip);
+ vmwrite(GUEST_RFLAGS, 2);
+ vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0);
+ vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP));
+ vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP));
+}
+
+void prepare_vmcs(void *guest_rip, void *guest_rsp)
+{
+ init_vmcs_control_fields();
+ init_vmcs_host_state();
+ init_vmcs_guest_state(guest_rip, guest_rsp);
+}
diff --git a/tools/testing/selftests/kvm/lib/x86.c b/tools/testing/selftests/kvm/lib/x86.c
new file mode 100644
index 000000000000..2f17675f4275
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86.c
@@ -0,0 +1,700 @@
+/*
+ * tools/testing/selftests/kvm/lib/x86.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kvm_util_internal.h"
+#include "x86.h"
+
+/* Minimum physical address used for virtual translation tables. */
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
+
+/* Virtual translation table structure declarations */
+struct pageMapL4Entry {
+ uint64_t present:1;
+ uint64_t writable:1;
+ uint64_t user:1;
+ uint64_t write_through:1;
+ uint64_t cache_disable:1;
+ uint64_t accessed:1;
+ uint64_t ignored_06:1;
+ uint64_t page_size:1;
+ uint64_t ignored_11_08:4;
+ uint64_t address:40;
+ uint64_t ignored_62_52:11;
+ uint64_t execute_disable:1;
+};
+
+struct pageDirectoryPointerEntry {
+ uint64_t present:1;
+ uint64_t writable:1;
+ uint64_t user:1;
+ uint64_t write_through:1;
+ uint64_t cache_disable:1;
+ uint64_t accessed:1;
+ uint64_t ignored_06:1;
+ uint64_t page_size:1;
+ uint64_t ignored_11_08:4;
+ uint64_t address:40;
+ uint64_t ignored_62_52:11;
+ uint64_t execute_disable:1;
+};
+
+struct pageDirectoryEntry {
+ uint64_t present:1;
+ uint64_t writable:1;
+ uint64_t user:1;
+ uint64_t write_through:1;
+ uint64_t cache_disable:1;
+ uint64_t accessed:1;
+ uint64_t ignored_06:1;
+ uint64_t page_size:1;
+ uint64_t ignored_11_08:4;
+ uint64_t address:40;
+ uint64_t ignored_62_52:11;
+ uint64_t execute_disable:1;
+};
+
+struct pageTableEntry {
+ uint64_t present:1;
+ uint64_t writable:1;
+ uint64_t user:1;
+ uint64_t write_through:1;
+ uint64_t cache_disable:1;
+ uint64_t accessed:1;
+ uint64_t dirty:1;
+ uint64_t reserved_07:1;
+ uint64_t global:1;
+ uint64_t ignored_11_09:3;
+ uint64_t address:40;
+ uint64_t ignored_62_52:11;
+ uint64_t execute_disable:1;
+};
+
+/* Register Dump
+ *
+ * Input Args:
+ * indent - Left margin indent amount
+ * regs - register
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the registers given by regs, to the FILE stream
+ * given by steam.
+ */
+void regs_dump(FILE *stream, struct kvm_regs *regs,
+ uint8_t indent)
+{
+ fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
+ "rcx: 0x%.16llx rdx: 0x%.16llx\n",
+ indent, "",
+ regs->rax, regs->rbx, regs->rcx, regs->rdx);
+ fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx "
+ "rsp: 0x%.16llx rbp: 0x%.16llx\n",
+ indent, "",
+ regs->rsi, regs->rdi, regs->rsp, regs->rbp);
+ fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx "
+ "r10: 0x%.16llx r11: 0x%.16llx\n",
+ indent, "",
+ regs->r8, regs->r9, regs->r10, regs->r11);
+ fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx "
+ "r14: 0x%.16llx r15: 0x%.16llx\n",
+ indent, "",
+ regs->r12, regs->r13, regs->r14, regs->r15);
+ fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n",
+ indent, "",
+ regs->rip, regs->rflags);
+}
+
+/* Segment Dump
+ *
+ * Input Args:
+ * indent - Left margin indent amount
+ * segment - KVM segment
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the KVM segment given by segment, to the FILE stream
+ * given by steam.
+ */
+static void segment_dump(FILE *stream, struct kvm_segment *segment,
+ uint8_t indent)
+{
+ fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x "
+ "selector: 0x%.4x type: 0x%.2x\n",
+ indent, "", segment->base, segment->limit,
+ segment->selector, segment->type);
+ fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x "
+ "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n",
+ indent, "", segment->present, segment->dpl,
+ segment->db, segment->s, segment->l);
+ fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x "
+ "unusable: 0x%.2x padding: 0x%.2x\n",
+ indent, "", segment->g, segment->avl,
+ segment->unusable, segment->padding);
+}
+
+/* dtable Dump
+ *
+ * Input Args:
+ * indent - Left margin indent amount
+ * dtable - KVM dtable
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the KVM dtable given by dtable, to the FILE stream
+ * given by steam.
+ */
+static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
+ uint8_t indent)
+{
+ fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x "
+ "padding: 0x%.4x 0x%.4x 0x%.4x\n",
+ indent, "", dtable->base, dtable->limit,
+ dtable->padding[0], dtable->padding[1], dtable->padding[2]);
+}
+
+/* System Register Dump
+ *
+ * Input Args:
+ * indent - Left margin indent amount
+ * sregs - System registers
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the system registers given by sregs, to the FILE stream
+ * given by steam.
+ */
+void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
+ uint8_t indent)
+{
+ unsigned int i;
+
+ fprintf(stream, "%*scs:\n", indent, "");
+ segment_dump(stream, &sregs->cs, indent + 2);
+ fprintf(stream, "%*sds:\n", indent, "");
+ segment_dump(stream, &sregs->ds, indent + 2);
+ fprintf(stream, "%*ses:\n", indent, "");
+ segment_dump(stream, &sregs->es, indent + 2);
+ fprintf(stream, "%*sfs:\n", indent, "");
+ segment_dump(stream, &sregs->fs, indent + 2);
+ fprintf(stream, "%*sgs:\n", indent, "");
+ segment_dump(stream, &sregs->gs, indent + 2);
+ fprintf(stream, "%*sss:\n", indent, "");
+ segment_dump(stream, &sregs->ss, indent + 2);
+ fprintf(stream, "%*str:\n", indent, "");
+ segment_dump(stream, &sregs->tr, indent + 2);
+ fprintf(stream, "%*sldt:\n", indent, "");
+ segment_dump(stream, &sregs->ldt, indent + 2);
+
+ fprintf(stream, "%*sgdt:\n", indent, "");
+ dtable_dump(stream, &sregs->gdt, indent + 2);
+ fprintf(stream, "%*sidt:\n", indent, "");
+ dtable_dump(stream, &sregs->idt, indent + 2);
+
+ fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx "
+ "cr3: 0x%.16llx cr4: 0x%.16llx\n",
+ indent, "",
+ sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4);
+ fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx "
+ "apic_base: 0x%.16llx\n",
+ indent, "",
+ sregs->cr8, sregs->efer, sregs->apic_base);
+
+ fprintf(stream, "%*sinterrupt_bitmap:\n", indent, "");
+ for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) {
+ fprintf(stream, "%*s%.16llx\n", indent + 2, "",
+ sregs->interrupt_bitmap[i]);
+ }
+}
+
+void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
+{
+ int rc;
+
+ TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+ "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+ /* If needed, create page map l4 table. */
+ if (!vm->pgd_created) {
+ vm_paddr_t paddr = vm_phy_page_alloc(vm,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+ vm->pgd = paddr;
+
+ /* Set pointer to pgd tables in all the VCPUs that
+ * have already been created. Future VCPUs will have
+ * the value set as each one is created.
+ */
+ for (struct vcpu *vcpu = vm->vcpu_head; vcpu;
+ vcpu = vcpu->next) {
+ struct kvm_sregs sregs;
+
+ /* Obtain the current system register settings */
+ vcpu_sregs_get(vm, vcpu->id, &sregs);
+
+ /* Set and store the pointer to the start of the
+ * pgd tables.
+ */
+ sregs.cr3 = vm->pgd;
+ vcpu_sregs_set(vm, vcpu->id, &sregs);
+ }
+
+ vm->pgd_created = true;
+ }
+}
+
+/* VM Virtual Page Map
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vaddr - VM Virtual Address
+ * paddr - VM Physical Address
+ * pgd_memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the VM given by vm, creates a virtual translation for the page
+ * starting at vaddr to the page starting at paddr.
+ */
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint32_t pgd_memslot)
+{
+ uint16_t index[4];
+ struct pageMapL4Entry *pml4e;
+
+ TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+ "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+ TEST_ASSERT((vaddr % vm->page_size) == 0,
+ "Virtual address not on page boundary,\n"
+ " vaddr: 0x%lx vm->page_size: 0x%x",
+ vaddr, vm->page_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+ (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx",
+ vaddr);
+ TEST_ASSERT((paddr % vm->page_size) == 0,
+ "Physical address not on page boundary,\n"
+ " paddr: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->page_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond beyond maximum supported,\n"
+ " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+
+ index[0] = (vaddr >> 12) & 0x1ffu;
+ index[1] = (vaddr >> 21) & 0x1ffu;
+ index[2] = (vaddr >> 30) & 0x1ffu;
+ index[3] = (vaddr >> 39) & 0x1ffu;
+
+ /* Allocate page directory pointer table if not present. */
+ pml4e = addr_gpa2hva(vm, vm->pgd);
+ if (!pml4e[index[3]].present) {
+ pml4e[index[3]].address = vm_phy_page_alloc(vm,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
+ >> vm->page_shift;
+ pml4e[index[3]].writable = true;
+ pml4e[index[3]].present = true;
+ }
+
+ /* Allocate page directory table if not present. */
+ struct pageDirectoryPointerEntry *pdpe;
+ pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
+ if (!pdpe[index[2]].present) {
+ pdpe[index[2]].address = vm_phy_page_alloc(vm,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
+ >> vm->page_shift;
+ pdpe[index[2]].writable = true;
+ pdpe[index[2]].present = true;
+ }
+
+ /* Allocate page table if not present. */
+ struct pageDirectoryEntry *pde;
+ pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
+ if (!pde[index[1]].present) {
+ pde[index[1]].address = vm_phy_page_alloc(vm,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
+ >> vm->page_shift;
+ pde[index[1]].writable = true;
+ pde[index[1]].present = true;
+ }
+
+ /* Fill in page table entry. */
+ struct pageTableEntry *pte;
+ pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
+ pte[index[0]].address = paddr >> vm->page_shift;
+ pte[index[0]].writable = true;
+ pte[index[0]].present = 1;
+}
+
+/* Virtual Translation Tables Dump
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * indent - Left margin indent amount
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps to the FILE stream given by stream, the contents of all the
+ * virtual translation tables for the VM given by vm.
+ */
+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ struct pageMapL4Entry *pml4e, *pml4e_start;
+ struct pageDirectoryPointerEntry *pdpe, *pdpe_start;
+ struct pageDirectoryEntry *pde, *pde_start;
+ struct pageTableEntry *pte, *pte_start;
+
+ if (!vm->pgd_created)
+ return;
+
+ fprintf(stream, "%*s "
+ " no\n", indent, "");
+ fprintf(stream, "%*s index hvaddr gpaddr "
+ "addr w exec dirty\n",
+ indent, "");
+ pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm,
+ vm->pgd);
+ for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
+ pml4e = &pml4e_start[n1];
+ if (!pml4e->present)
+ continue;
+ fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
+ " %u\n",
+ indent, "",
+ pml4e - pml4e_start, pml4e,
+ addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address,
+ pml4e->writable, pml4e->execute_disable);
+
+ pdpe_start = addr_gpa2hva(vm, pml4e->address
+ * vm->page_size);
+ for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
+ pdpe = &pdpe_start[n2];
+ if (!pdpe->present)
+ continue;
+ fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx "
+ "%u %u\n",
+ indent, "",
+ pdpe - pdpe_start, pdpe,
+ addr_hva2gpa(vm, pdpe),
+ (uint64_t) pdpe->address, pdpe->writable,
+ pdpe->execute_disable);
+
+ pde_start = addr_gpa2hva(vm,
+ pdpe->address * vm->page_size);
+ for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
+ pde = &pde_start[n3];
+ if (!pde->present)
+ continue;
+ fprintf(stream, "%*spde 0x%-3zx %p "
+ "0x%-12lx 0x%-10lx %u %u\n",
+ indent, "", pde - pde_start, pde,
+ addr_hva2gpa(vm, pde),
+ (uint64_t) pde->address, pde->writable,
+ pde->execute_disable);
+
+ pte_start = addr_gpa2hva(vm,
+ pde->address * vm->page_size);
+ for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
+ pte = &pte_start[n4];
+ if (!pte->present)
+ continue;
+ fprintf(stream, "%*spte 0x%-3zx %p "
+ "0x%-12lx 0x%-10lx %u %u "
+ " %u 0x%-10lx\n",
+ indent, "",
+ pte - pte_start, pte,
+ addr_hva2gpa(vm, pte),
+ (uint64_t) pte->address,
+ pte->writable,
+ pte->execute_disable,
+ pte->dirty,
+ ((uint64_t) n1 << 27)
+ | ((uint64_t) n2 << 18)
+ | ((uint64_t) n3 << 9)
+ | ((uint64_t) n4));
+ }
+ }
+ }
+ }
+}
+
+/* Set Unusable Segment
+ *
+ * Input Args: None
+ *
+ * Output Args:
+ * segp - Pointer to segment register
+ *
+ * Return: None
+ *
+ * Sets the segment register pointed to by segp to an unusable state.
+ */
+static void kvm_seg_set_unusable(struct kvm_segment *segp)
+{
+ memset(segp, 0, sizeof(*segp));
+ segp->unusable = true;
+}
+
+/* Set Long Mode Flat Kernel Code Segment
+ *
+ * Input Args:
+ * selector - selector value
+ *
+ * Output Args:
+ * segp - Pointer to KVM segment
+ *
+ * Return: None
+ *
+ * Sets up the KVM segment pointed to by segp, to be a code segment
+ * with the selector value given by selector.
+ */
+static void kvm_seg_set_kernel_code_64bit(uint16_t selector,
+ struct kvm_segment *segp)
+{
+ memset(segp, 0, sizeof(*segp));
+ segp->selector = selector;
+ segp->limit = 0xFFFFFFFFu;
+ segp->s = 0x1; /* kTypeCodeData */
+ segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
+ * | kFlagCodeReadable
+ */
+ segp->g = true;
+ segp->l = true;
+ segp->present = 1;
+}
+
+/* Set Long Mode Flat Kernel Data Segment
+ *
+ * Input Args:
+ * selector - selector value
+ *
+ * Output Args:
+ * segp - Pointer to KVM segment
+ *
+ * Return: None
+ *
+ * Sets up the KVM segment pointed to by segp, to be a data segment
+ * with the selector value given by selector.
+ */
+static void kvm_seg_set_kernel_data_64bit(uint16_t selector,
+ struct kvm_segment *segp)
+{
+ memset(segp, 0, sizeof(*segp));
+ segp->selector = selector;
+ segp->limit = 0xFFFFFFFFu;
+ segp->s = 0x1; /* kTypeCodeData */
+ segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
+ * | kFlagDataWritable
+ */
+ segp->g = true;
+ segp->present = true;
+}
+
+/* Address Guest Virtual to Guest Physical
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * gpa - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent VM physical address
+ *
+ * Translates the VM virtual address given by gva to a VM physical
+ * address and then locates the memory region containing the VM
+ * physical address, within the VM given by vm. When found, the host
+ * virtual address providing the memory to the vm physical address is returned.
+ * A TEST_ASSERT failure occurs if no region containing translated
+ * VM virtual address exists.
+ */
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ uint16_t index[4];
+ struct pageMapL4Entry *pml4e;
+ struct pageDirectoryPointerEntry *pdpe;
+ struct pageDirectoryEntry *pde;
+ struct pageTableEntry *pte;
+ void *hva;
+
+ TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+ "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+ index[0] = (gva >> 12) & 0x1ffu;
+ index[1] = (gva >> 21) & 0x1ffu;
+ index[2] = (gva >> 30) & 0x1ffu;
+ index[3] = (gva >> 39) & 0x1ffu;
+
+ if (!vm->pgd_created)
+ goto unmapped_gva;
+ pml4e = addr_gpa2hva(vm, vm->pgd);
+ if (!pml4e[index[3]].present)
+ goto unmapped_gva;
+
+ pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
+ if (!pdpe[index[2]].present)
+ goto unmapped_gva;
+
+ pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
+ if (!pde[index[1]].present)
+ goto unmapped_gva;
+
+ pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
+ if (!pte[index[0]].present)
+ goto unmapped_gva;
+
+ return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu);
+
+unmapped_gva:
+ TEST_ASSERT(false, "No mapping for vm virtual address, "
+ "gva: 0x%lx", gva);
+}
+
+void vcpu_setup(struct kvm_vm *vm, int vcpuid)
+{
+ struct kvm_sregs sregs;
+
+ /* Set mode specific system register values. */
+ vcpu_sregs_get(vm, vcpuid, &sregs);
+
+ switch (vm->mode) {
+ case VM_MODE_FLAT48PG:
+ sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
+ sregs.cr4 |= X86_CR4_PAE;
+ sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
+
+ kvm_seg_set_unusable(&sregs.ldt);
+ kvm_seg_set_kernel_code_64bit(0x8, &sregs.cs);
+ kvm_seg_set_kernel_data_64bit(0x10, &sregs.ds);
+ kvm_seg_set_kernel_data_64bit(0x10, &sregs.es);
+ break;
+
+ default:
+ TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+ vcpu_sregs_set(vm, vcpuid, &sregs);
+
+ /* If virtual translation table have been setup, set system register
+ * to point to the tables. It's okay if they haven't been setup yet,
+ * in that the code that sets up the virtual translation tables, will
+ * go back through any VCPUs that have already been created and set
+ * their values.
+ */
+ if (vm->pgd_created) {
+ struct kvm_sregs sregs;
+
+ vcpu_sregs_get(vm, vcpuid, &sregs);
+
+ sregs.cr3 = vm->pgd;
+ vcpu_sregs_set(vm, vcpuid, &sregs);
+ }
+}
+/* Adds a vCPU with reasonable defaults (i.e., a stack)
+ *
+ * Input Args:
+ * vcpuid - The id of the VCPU to add to the VM.
+ * guest_code - The vCPU's entry point
+ */
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+{
+ struct kvm_mp_state mp_state;
+ struct kvm_regs regs;
+ vm_vaddr_t stack_vaddr;
+ stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
+ DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
+
+ /* Create VCPU */
+ vm_vcpu_add(vm, vcpuid);
+
+ /* Setup guest general purpose registers */
+ vcpu_regs_get(vm, vcpuid, &regs);
+ regs.rflags = regs.rflags | 0x2;
+ regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize());
+ regs.rip = (unsigned long) guest_code;
+ vcpu_regs_set(vm, vcpuid, &regs);
+
+ /* Setup the MP state */
+ mp_state.mp_state = 0;
+ vcpu_set_mp_state(vm, vcpuid, &mp_state);
+}
+
+/* VM VCPU CPUID Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU id
+ * cpuid - The CPUID values to set.
+ *
+ * Output Args: None
+ *
+ * Return: void
+ *
+ * Set the VCPU's CPUID.
+ */
+void vcpu_set_cpuid(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_cpuid2 *cpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int rc;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid);
+ TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i",
+ rc, errno);
+
+}
+/* Create a VM with reasonable defaults
+ *
+ * Input Args:
+ * vcpuid - The id of the single VCPU to add to the VM.
+ * guest_code - The vCPU's entry point
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to opaque structure that describes the created VM.
+ */
+struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code)
+{
+ struct kvm_vm *vm;
+
+ /* Create VM */
+ vm = vm_create(VM_MODE_FLAT48PG, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+
+ /* Setup guest code */
+ kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+
+ /* Setup IRQ Chip */
+ vm_create_irqchip(vm);
+
+ /* Add the first vCPU. */
+ vm_vcpu_add_default(vm, vcpuid, guest_code);
+
+ return vm;
+}
diff --git a/tools/testing/selftests/kvm/set_sregs_test.c b/tools/testing/selftests/kvm/set_sregs_test.c
new file mode 100644
index 000000000000..090fd3f19352
--- /dev/null
+++ b/tools/testing/selftests/kvm/set_sregs_test.c
@@ -0,0 +1,54 @@
+/*
+ * KVM_SET_SREGS tests
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * This is a regression test for the bug fixed by the following commit:
+ * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values")
+ *
+ * That bug allowed a user-mode program that called the KVM_SET_SREGS
+ * ioctl to put a VCPU's local APIC into an invalid state.
+ *
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "x86.h"
+
+#define VCPU_ID 5
+
+int main(int argc, char *argv[])
+{
+ struct kvm_sregs sregs;
+ struct kvm_vm *vm;
+ int rc;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, NULL);
+
+ vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ sregs.apic_base = 1 << 10;
+ rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+ TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)",
+ sregs.apic_base);
+ sregs.apic_base = 1 << 11;
+ rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+ TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",
+ sregs.apic_base);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/sync_regs_test.c b/tools/testing/selftests/kvm/sync_regs_test.c
new file mode 100644
index 000000000000..428e9473f5e2
--- /dev/null
+++ b/tools/testing/selftests/kvm/sync_regs_test.c
@@ -0,0 +1,232 @@
+/*
+ * Test for x86 KVM_CAP_SYNC_REGS
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verifies expected behavior of x86 KVM_CAP_SYNC_REGS functionality,
+ * including requesting an invalid register set, updates to/from values
+ * in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "x86.h"
+
+#define VCPU_ID 5
+#define PORT_HOST_SYNC 0x1000
+
+static void __exit_to_l0(uint16_t port, uint64_t arg0, uint64_t arg1)
+{
+ __asm__ __volatile__("in %[port], %%al"
+ :
+ : [port]"d"(port), "D"(arg0), "S"(arg1)
+ : "rax");
+}
+
+#define exit_to_l0(_port, _arg0, _arg1) \
+ __exit_to_l0(_port, (uint64_t) (_arg0), (uint64_t) (_arg1))
+
+#define GUEST_ASSERT(_condition) do { \
+ if (!(_condition)) \
+ exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition, 0);\
+} while (0)
+
+void guest_code(void)
+{
+ for (;;) {
+ exit_to_l0(PORT_HOST_SYNC, "hello", 0);
+ asm volatile ("inc %r11");
+ }
+}
+
+static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
+{
+#define REG_COMPARE(reg) \
+ TEST_ASSERT(left->reg == right->reg, \
+ "Register " #reg \
+ " values did not match: 0x%llx, 0x%llx\n", \
+ left->reg, right->reg)
+ REG_COMPARE(rax);
+ REG_COMPARE(rbx);
+ REG_COMPARE(rcx);
+ REG_COMPARE(rdx);
+ REG_COMPARE(rsi);
+ REG_COMPARE(rdi);
+ REG_COMPARE(rsp);
+ REG_COMPARE(rbp);
+ REG_COMPARE(r8);
+ REG_COMPARE(r9);
+ REG_COMPARE(r10);
+ REG_COMPARE(r11);
+ REG_COMPARE(r12);
+ REG_COMPARE(r13);
+ REG_COMPARE(r14);
+ REG_COMPARE(r15);
+ REG_COMPARE(rip);
+ REG_COMPARE(rflags);
+#undef REG_COMPARE
+}
+
+static void compare_sregs(struct kvm_sregs *left, struct kvm_sregs *right)
+{
+}
+
+static void compare_vcpu_events(struct kvm_vcpu_events *left,
+ struct kvm_vcpu_events *right)
+{
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ struct kvm_regs regs;
+ struct kvm_sregs sregs;
+ struct kvm_vcpu_events events;
+ int rv, cap;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
+ TEST_ASSERT((unsigned long)cap == KVM_SYNC_X86_VALID_FIELDS,
+ "KVM_CAP_SYNC_REGS (0x%x) != KVM_SYNC_X86_VALID_FIELDS (0x%lx)\n",
+ cap, KVM_SYNC_X86_VALID_FIELDS);
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, guest_code);
+
+ run = vcpu_state(vm, VCPU_ID);
+
+ /* Request reading invalid register set from VCPU. */
+ run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS << 1;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+ rv);
+ vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+
+ /* Request setting invalid register set into VCPU. */
+ run->kvm_dirty_regs = KVM_SYNC_X86_VALID_FIELDS << 1;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+ rv);
+ vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+
+ /* Request and verify all valid register sets. */
+ /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
+ run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+ compare_regs(&regs, &run->s.regs.regs);
+
+ vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ compare_sregs(&sregs, &run->s.regs.sregs);
+
+ vcpu_events_get(vm, VCPU_ID, &events);
+ compare_vcpu_events(&events, &run->s.regs.events);
+
+ /* Set and verify various register values. */
+ run->s.regs.regs.r11 = 0xBAD1DEA;
+ run->s.regs.sregs.apic_base = 1 << 11;
+ /* TODO run->s.regs.events.XYZ = ABC; */
+
+ run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS;
+ run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ TEST_ASSERT(run->s.regs.regs.r11 == 0xBAD1DEA + 1,
+ "r11 sync regs value incorrect 0x%llx.",
+ run->s.regs.regs.r11);
+ TEST_ASSERT(run->s.regs.sregs.apic_base == 1 << 11,
+ "apic_base sync regs value incorrect 0x%llx.",
+ run->s.regs.sregs.apic_base);
+
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+ compare_regs(&regs, &run->s.regs.regs);
+
+ vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ compare_sregs(&sregs, &run->s.regs.sregs);
+
+ vcpu_events_get(vm, VCPU_ID, &events);
+ compare_vcpu_events(&events, &run->s.regs.events);
+
+ /* Clear kvm_dirty_regs bits, verify new s.regs values are
+ * overwritten with existing guest values.
+ */
+ run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS;
+ run->kvm_dirty_regs = 0;
+ run->s.regs.regs.r11 = 0xDEADBEEF;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ TEST_ASSERT(run->s.regs.regs.r11 != 0xDEADBEEF,
+ "r11 sync regs value incorrect 0x%llx.",
+ run->s.regs.regs.r11);
+
+ /* Clear kvm_valid_regs bits and kvm_dirty_bits.
+ * Verify s.regs values are not overwritten with existing guest values
+ * and that guest values are not overwritten with kvm_sync_regs values.
+ */
+ run->kvm_valid_regs = 0;
+ run->kvm_dirty_regs = 0;
+ run->s.regs.regs.r11 = 0xAAAA;
+ regs.r11 = 0xBAC0;
+ vcpu_regs_set(vm, VCPU_ID, &regs);
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ TEST_ASSERT(run->s.regs.regs.r11 == 0xAAAA,
+ "r11 sync regs value incorrect 0x%llx.",
+ run->s.regs.regs.r11);
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+ TEST_ASSERT(regs.r11 == 0xBAC0 + 1,
+ "r11 guest value incorrect 0x%llx.",
+ regs.r11);
+
+ /* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten
+ * with existing guest values but that guest values are overwritten
+ * with kvm_sync_regs values.
+ */
+ run->kvm_valid_regs = 0;
+ run->kvm_dirty_regs = KVM_SYNC_X86_VALID_FIELDS;
+ run->s.regs.regs.r11 = 0xBBBB;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ TEST_ASSERT(run->s.regs.regs.r11 == 0xBBBB,
+ "r11 sync regs value incorrect 0x%llx.",
+ run->s.regs.regs.r11);
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+ TEST_ASSERT(regs.r11 == 0xBBBB + 1,
+ "r11 guest value incorrect 0x%llx.",
+ regs.r11);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
new file mode 100644
index 000000000000..8f7f62093add
--- /dev/null
+++ b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
@@ -0,0 +1,231 @@
+/*
+ * gtests/tests/vmx_tsc_adjust_test.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ *
+ * IA32_TSC_ADJUST test
+ *
+ * According to the SDM, "if an execution of WRMSR to the
+ * IA32_TIME_STAMP_COUNTER MSR adds (or subtracts) value X from the TSC,
+ * the logical processor also adds (or subtracts) value X from the
+ * IA32_TSC_ADJUST MSR.
+ *
+ * Note that when L1 doesn't intercept writes to IA32_TSC, a
+ * WRMSR(IA32_TSC) from L2 sets L1's TSC value, not L2's perceived TSC
+ * value.
+ *
+ * This test verifies that this unusual case is handled correctly.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "x86.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#ifndef MSR_IA32_TSC_ADJUST
+#define MSR_IA32_TSC_ADJUST 0x3b
+#endif
+
+#define PAGE_SIZE 4096
+#define VCPU_ID 5
+
+#define TSC_ADJUST_VALUE (1ll << 32)
+#define TSC_OFFSET_VALUE -(1ll << 48)
+
+enum {
+ PORT_ABORT = 0x1000,
+ PORT_REPORT,
+ PORT_DONE,
+};
+
+struct vmx_page {
+ vm_vaddr_t virt;
+ vm_paddr_t phys;
+};
+
+enum {
+ VMXON_PAGE = 0,
+ VMCS_PAGE,
+ MSR_BITMAP_PAGE,
+
+ NUM_VMX_PAGES,
+};
+
+struct kvm_single_msr {
+ struct kvm_msrs header;
+ struct kvm_msr_entry entry;
+} __attribute__((packed));
+
+/* The virtual machine object. */
+static struct kvm_vm *vm;
+
+/* Array of vmx_page descriptors that is shared with the guest. */
+struct vmx_page *vmx_pages;
+
+#define exit_to_l0(_port, _arg) do_exit_to_l0(_port, (unsigned long) (_arg))
+static void do_exit_to_l0(uint16_t port, unsigned long arg)
+{
+ __asm__ __volatile__("in %[port], %%al"
+ :
+ : [port]"d"(port), "D"(arg)
+ : "rax");
+}
+
+
+#define GUEST_ASSERT(_condition) do { \
+ if (!(_condition)) \
+ exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition); \
+} while (0)
+
+static void check_ia32_tsc_adjust(int64_t max)
+{
+ int64_t adjust;
+
+ adjust = rdmsr(MSR_IA32_TSC_ADJUST);
+ exit_to_l0(PORT_REPORT, adjust);
+ GUEST_ASSERT(adjust <= max);
+}
+
+static void l2_guest_code(void)
+{
+ uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE;
+
+ wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE);
+ check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
+
+ /* Exit to L1 */
+ __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_page *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uint32_t control;
+ uintptr_t save_cr3;
+
+ GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE);
+ wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE);
+ check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
+
+ prepare_for_vmx_operation();
+
+ /* Enter VMX root operation. */
+ *(uint32_t *)vmx_pages[VMXON_PAGE].virt = vmcs_revision();
+ GUEST_ASSERT(!vmxon(vmx_pages[VMXON_PAGE].phys));
+
+ /* Load a VMCS. */
+ *(uint32_t *)vmx_pages[VMCS_PAGE].virt = vmcs_revision();
+ GUEST_ASSERT(!vmclear(vmx_pages[VMCS_PAGE].phys));
+ GUEST_ASSERT(!vmptrld(vmx_pages[VMCS_PAGE].phys));
+
+ /* Prepare the VMCS for L2 execution. */
+ prepare_vmcs(l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETING;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+ vmwrite(MSR_BITMAP, vmx_pages[MSR_BITMAP_PAGE].phys);
+ vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
+
+ /* Jump into L2. First, test failure to load guest CR3. */
+ save_cr3 = vmreadz(GUEST_CR3);
+ vmwrite(GUEST_CR3, -1ull);
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
+ (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
+ check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
+ vmwrite(GUEST_CR3, save_cr3);
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
+
+ exit_to_l0(PORT_DONE, 0);
+}
+
+static void allocate_vmx_page(struct vmx_page *page)
+{
+ vm_vaddr_t virt;
+
+ virt = vm_vaddr_alloc(vm, PAGE_SIZE, 0, 0, 0);
+ memset(addr_gva2hva(vm, virt), 0, PAGE_SIZE);
+
+ page->virt = virt;
+ page->phys = addr_gva2gpa(vm, virt);
+}
+
+static vm_vaddr_t allocate_vmx_pages(void)
+{
+ vm_vaddr_t vmx_pages_vaddr;
+ int i;
+
+ vmx_pages_vaddr = vm_vaddr_alloc(
+ vm, sizeof(struct vmx_page) * NUM_VMX_PAGES, 0, 0, 0);
+
+ vmx_pages = (void *) addr_gva2hva(vm, vmx_pages_vaddr);
+
+ for (i = 0; i < NUM_VMX_PAGES; i++)
+ allocate_vmx_page(&vmx_pages[i]);
+
+ return vmx_pages_vaddr;
+}
+
+void report(int64_t val)
+{
+ printf("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n",
+ val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE);
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_vaddr;
+ struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
+
+ if (!(entry->ecx & CPUID_VMX)) {
+ printf("nested VMX not enabled, skipping test");
+ return 0;
+ }
+
+ vm = vm_create_default_vmx(VCPU_ID, (void *) l1_guest_code);
+
+ /* Allocate VMX pages and shared descriptors (vmx_pages). */
+ vmx_pages_vaddr = allocate_vmx_pages();
+ vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_vaddr);
+
+ for (;;) {
+ volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct kvm_regs regs;
+
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Got exit_reason other than KVM_EXIT_IO: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+
+ switch (run->io.port) {
+ case PORT_ABORT:
+ TEST_ASSERT(false, "%s", (const char *) regs.rdi);
+ /* NOT REACHED */
+ case PORT_REPORT:
+ report(regs.rdi);
+ break;
+ case PORT_DONE:
+ goto done;
+ default:
+ TEST_ASSERT(false, "Unknown port 0x%x.", run->io.port);
+ }
+ }
+
+ kvm_vm_free(vm);
+done:
+ return 0;
+}
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 7de482a0519d..c1b1a4dc6a96 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -20,9 +20,10 @@ all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
.ONESHELL:
define RUN_TESTS
- @test_num=`echo 0`;
- @echo "TAP version 13";
- @for TEST in $(1); do \
+ @export KSFT_TAP_LEVEL=`echo 1`; \
+ test_num=`echo 0`; \
+ echo "TAP version 13"; \
+ for TEST in $(1); do \
BASENAME_TEST=`basename $$TEST`; \
test_num=`echo $$test_num+1 | bc`; \
echo "selftests: $$BASENAME_TEST"; \
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 785fc18a16b4..3ff81a478dbe 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -6,6 +6,7 @@ CFLAGS += -I../../../../usr/include/
TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh
+TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
diff --git a/tools/testing/selftests/powerpc/benchmarks/.gitignore b/tools/testing/selftests/powerpc/benchmarks/.gitignore
index 04dc1e6ef2ce..9161679b1e1a 100644
--- a/tools/testing/selftests/powerpc/benchmarks/.gitignore
+++ b/tools/testing/selftests/powerpc/benchmarks/.gitignore
@@ -1,5 +1,7 @@
gettimeofday
context_switch
+fork
+exec_target
mmap_bench
futex_bench
null_syscall
diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile
index a35058e3766c..b4d7432a0ecd 100644
--- a/tools/testing/selftests/powerpc/benchmarks/Makefile
+++ b/tools/testing/selftests/powerpc/benchmarks/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_GEN_PROGS := gettimeofday context_switch mmap_bench futex_bench null_syscall
+TEST_GEN_PROGS := gettimeofday context_switch fork mmap_bench futex_bench null_syscall
+TEST_GEN_FILES := exec_target
CFLAGS += -O2
@@ -10,3 +11,7 @@ $(TEST_GEN_PROGS): ../harness.c
$(OUTPUT)/context_switch: ../utils.c
$(OUTPUT)/context_switch: CFLAGS += -maltivec -mvsx -mabi=altivec
$(OUTPUT)/context_switch: LDLIBS += -lpthread
+
+$(OUTPUT)/fork: LDLIBS += -lpthread
+
+$(OUTPUT)/exec_target: CFLAGS += -static -nostartfiles
diff --git a/tools/testing/selftests/powerpc/benchmarks/exec_target.c b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
new file mode 100644
index 000000000000..3c9c144192be
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Part of fork context switch microbenchmark.
+ *
+ * Copyright 2018, Anton Blanchard, IBM Corp.
+ */
+
+void _exit(int);
+void _start(void)
+{
+ _exit(0);
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/fork.c b/tools/testing/selftests/powerpc/benchmarks/fork.c
new file mode 100644
index 000000000000..d312e638cb37
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/fork.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Context switch microbenchmark.
+ *
+ * Copyright 2018, Anton Blanchard, IBM Corp.
+ */
+
+#define _GNU_SOURCE
+#include <assert.h>
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <linux/futex.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/shm.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+static unsigned int timeout = 30;
+
+static void set_cpu(int cpu)
+{
+ cpu_set_t cpuset;
+
+ if (cpu == -1)
+ return;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset)) {
+ perror("sched_setaffinity");
+ exit(1);
+ }
+}
+
+static void start_process_on(void *(*fn)(void *), void *arg, int cpu)
+{
+ int pid;
+
+ pid = fork();
+ if (pid == -1) {
+ perror("fork");
+ exit(1);
+ }
+
+ if (pid)
+ return;
+
+ set_cpu(cpu);
+
+ fn(arg);
+
+ exit(0);
+}
+
+static int cpu;
+static int do_fork = 0;
+static int do_vfork = 0;
+static int do_exec = 0;
+static char *exec_file;
+static int exec_target = 0;
+static unsigned long iterations;
+static unsigned long iterations_prev;
+
+static void run_exec(void)
+{
+ char *const argv[] = { "./exec_target", NULL };
+
+ if (execve("./exec_target", argv, NULL) == -1) {
+ perror("execve");
+ exit(1);
+ }
+}
+
+static void bench_fork(void)
+{
+ while (1) {
+ pid_t pid = fork();
+ if (pid == -1) {
+ perror("fork");
+ exit(1);
+ }
+ if (pid == 0) {
+ if (do_exec)
+ run_exec();
+ _exit(0);
+ }
+ pid = waitpid(pid, NULL, 0);
+ if (pid == -1) {
+ perror("waitpid");
+ exit(1);
+ }
+ iterations++;
+ }
+}
+
+static void bench_vfork(void)
+{
+ while (1) {
+ pid_t pid = vfork();
+ if (pid == -1) {
+ perror("fork");
+ exit(1);
+ }
+ if (pid == 0) {
+ if (do_exec)
+ run_exec();
+ _exit(0);
+ }
+ pid = waitpid(pid, NULL, 0);
+ if (pid == -1) {
+ perror("waitpid");
+ exit(1);
+ }
+ iterations++;
+ }
+}
+
+static void *null_fn(void *arg)
+{
+ pthread_exit(NULL);
+}
+
+static void bench_thread(void)
+{
+ pthread_t tid;
+ cpu_set_t cpuset;
+ pthread_attr_t attr;
+ int rc;
+
+ rc = pthread_attr_init(&attr);
+ if (rc) {
+ errno = rc;
+ perror("pthread_attr_init");
+ exit(1);
+ }
+
+ if (cpu != -1) {
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+
+ rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+ if (rc) {
+ errno = rc;
+ perror("pthread_attr_setaffinity_np");
+ exit(1);
+ }
+ }
+
+ while (1) {
+ rc = pthread_create(&tid, &attr, null_fn, NULL);
+ if (rc) {
+ errno = rc;
+ perror("pthread_create");
+ exit(1);
+ }
+ rc = pthread_join(tid, NULL);
+ if (rc) {
+ errno = rc;
+ perror("pthread_join");
+ exit(1);
+ }
+ iterations++;
+ }
+}
+
+static void sigalrm_handler(int junk)
+{
+ unsigned long i = iterations;
+
+ printf("%ld\n", i - iterations_prev);
+ iterations_prev = i;
+
+ if (--timeout == 0)
+ kill(0, SIGUSR1);
+
+ alarm(1);
+}
+
+static void sigusr1_handler(int junk)
+{
+ exit(0);
+}
+
+static void *bench_proc(void *arg)
+{
+ signal(SIGALRM, sigalrm_handler);
+ alarm(1);
+
+ if (do_fork)
+ bench_fork();
+ else if (do_vfork)
+ bench_vfork();
+ else
+ bench_thread();
+
+ return NULL;
+}
+
+static struct option options[] = {
+ { "fork", no_argument, &do_fork, 1 },
+ { "vfork", no_argument, &do_vfork, 1 },
+ { "exec", no_argument, &do_exec, 1 },
+ { "timeout", required_argument, 0, 's' },
+ { "exec-target", no_argument, &exec_target, 1 },
+ { NULL },
+};
+
+static void usage(void)
+{
+ fprintf(stderr, "Usage: fork <options> CPU\n\n");
+ fprintf(stderr, "\t\t--fork\tUse fork() (default threads)\n");
+ fprintf(stderr, "\t\t--vfork\tUse vfork() (default threads)\n");
+ fprintf(stderr, "\t\t--exec\tAlso exec() (default no exec)\n");
+ fprintf(stderr, "\t\t--timeout=X\tDuration in seconds to run (default 30)\n");
+ fprintf(stderr, "\t\t--exec-target\tInternal option for exec workload\n");
+}
+
+int main(int argc, char *argv[])
+{
+ signed char c;
+
+ while (1) {
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "", options, &option_index);
+
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 0:
+ if (options[option_index].flag != 0)
+ break;
+
+ usage();
+ exit(1);
+ break;
+
+ case 's':
+ timeout = atoi(optarg);
+ break;
+
+ default:
+ usage();
+ exit(1);
+ }
+ }
+
+ if (do_fork && do_vfork) {
+ usage();
+ exit(1);
+ }
+ if (do_exec && !do_fork && !do_vfork) {
+ usage();
+ exit(1);
+ }
+
+ if (do_exec) {
+ char *dirname = strdup(argv[0]);
+ int i;
+ i = strlen(dirname) - 1;
+ while (i) {
+ if (dirname[i] == '/') {
+ dirname[i] = '\0';
+ if (chdir(dirname) == -1) {
+ perror("chdir");
+ exit(1);
+ }
+ break;
+ }
+ i--;
+ }
+ }
+
+ if (exec_target) {
+ exit(0);
+ }
+
+ if (((argc - optind) != 1)) {
+ cpu = -1;
+ } else {
+ cpu = atoi(argv[optind++]);
+ }
+
+ if (do_exec)
+ exec_file = argv[0];
+
+ set_cpu(cpu);
+
+ printf("Using ");
+ if (do_fork)
+ printf("fork");
+ else if (do_vfork)
+ printf("vfork");
+ else
+ printf("clone");
+
+ if (do_exec)
+ printf(" + exec");
+
+ printf(" on cpu %d\n", cpu);
+
+ /* Create a new process group so we can signal everyone for exit */
+ setpgid(getpid(), getpid());
+
+ signal(SIGUSR1, sigusr1_handler);
+
+ start_process_on(bench_proc, NULL, cpu);
+
+ while (1)
+ sleep(3600);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
index ac4a52e19e59..eedce3366f64 100644
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -5,8 +5,8 @@ CFLAGS += -I$(CURDIR)
CFLAGS += -D SELFTEST
CFLAGS += -maltivec
-# Use our CFLAGS for the implicit .S rule
-ASFLAGS = $(CFLAGS)
+# Use our CFLAGS for the implicit .S rule & set the asm machine type
+ASFLAGS = $(CFLAGS) -Wa,-mpower4
TEST_GEN_PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7
EXTRA_SOURCES := validate.c ../harness.c
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index 5c72ff978f27..c0e45d2dde25 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -4,7 +4,7 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu
TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \
- $(SIGNAL_CONTEXT_CHK_TESTS)
+ $(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn
include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/tm/tm-sigreturn.c b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
new file mode 100644
index 000000000000..85d63449243b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2015, Laurent Dufour, IBM Corp.
+ *
+ * Test the kernel's signal returning code to check reclaim is done if the
+ * sigreturn() is called while in a transaction (suspended since active is
+ * already dropped trough the system call path).
+ *
+ * The kernel must discard the transaction when entering sigreturn, since
+ * restoring the potential TM SPRS from the signal frame is requiring to not be
+ * in a transaction.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "tm.h"
+#include "utils.h"
+
+
+void handler(int sig)
+{
+ uint64_t ret;
+
+ asm __volatile__(
+ "li 3,1 ;"
+ "tbegin. ;"
+ "beq 1f ;"
+ "li 3,0 ;"
+ "tsuspend. ;"
+ "1: ;"
+ "std%X[ret] 3, %[ret] ;"
+ : [ret] "=m"(ret)
+ :
+ : "memory", "3", "cr0");
+
+ if (ret)
+ exit(1);
+
+ /*
+ * We return from the signal handle while in a suspended transaction
+ */
+}
+
+
+int tm_sigreturn(void)
+{
+ struct sigaction sa;
+ uint64_t ret = 0;
+
+ SKIP_IF(!have_htm());
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = handler;
+ sigemptyset(&sa.sa_mask);
+
+ if (sigaction(SIGSEGV, &sa, NULL))
+ exit(1);
+
+ asm __volatile__(
+ "tbegin. ;"
+ "beq 1f ;"
+ "li 3,0 ;"
+ "std 3,0(3) ;" /* trigger SEGV */
+ "li 3,1 ;"
+ "std%X[ret] 3,%[ret] ;"
+ "tend. ;"
+ "b 2f ;"
+ "1: ;"
+ "li 3,2 ;"
+ "std%X[ret] 3,%[ret] ;"
+ "2: ;"
+ : [ret] "=m"(ret)
+ :
+ : "memory", "3", "cr0");
+
+ if (ret != 2)
+ exit(1);
+
+ exit(0);
+}
+
+int main(void)
+{
+ return test_harness(tm_sigreturn, "tm_sigreturn");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
index e6a0fad2bfd0..156c8e750259 100644
--- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c
+++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
@@ -80,7 +80,7 @@ bool is_failure(uint64_t condition_reg)
return ((condition_reg >> 28) & 0xa) == 0xa;
}
-void *ping(void *input)
+void *tm_una_ping(void *input)
{
/*
@@ -280,7 +280,7 @@ void *ping(void *input)
}
/* Thread to force context switch */
-void *pong(void *not_used)
+void *tm_una_pong(void *not_used)
{
/* Wait thread get its name "pong". */
if (DEBUG)
@@ -311,11 +311,11 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr)
do {
int rc;
- /* Bind 'ping' to CPU 0, as specified in 'attr'. */
- rc = pthread_create(&t0, attr, ping, (void *) &flags);
+ /* Bind to CPU 0, as specified in 'attr'. */
+ rc = pthread_create(&t0, attr, tm_una_ping, (void *) &flags);
if (rc)
pr_err(rc, "pthread_create()");
- rc = pthread_setname_np(t0, "ping");
+ rc = pthread_setname_np(t0, "tm_una_ping");
if (rc)
pr_warn(rc, "pthread_setname_np");
rc = pthread_join(t0, &ret_value);
@@ -333,13 +333,15 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr)
}
}
-int main(int argc, char **argv)
+int tm_unavailable_test(void)
{
int rc, exception; /* FP = 0, VEC = 1, VSX = 2 */
pthread_t t1;
pthread_attr_t attr;
cpu_set_t cpuset;
+ SKIP_IF(!have_htm());
+
/* Set only CPU 0 in the mask. Both threads will be bound to CPU 0. */
CPU_ZERO(&cpuset);
CPU_SET(0, &cpuset);
@@ -354,12 +356,12 @@ int main(int argc, char **argv)
if (rc)
pr_err(rc, "pthread_attr_setaffinity_np()");
- rc = pthread_create(&t1, &attr /* Bind 'pong' to CPU 0 */, pong, NULL);
+ rc = pthread_create(&t1, &attr /* Bind to CPU 0 */, tm_una_pong, NULL);
if (rc)
pr_err(rc, "pthread_create()");
/* Name it for systemtap convenience */
- rc = pthread_setname_np(t1, "pong");
+ rc = pthread_setname_np(t1, "tm_una_pong");
if (rc)
pr_warn(rc, "pthread_create()");
@@ -394,3 +396,9 @@ int main(int argc, char **argv)
exit(0);
}
}
+
+int main(int argc, char **argv)
+{
+ test_harness_set_timeout(220);
+ return test_harness(tm_unavailable_test, "tm_unavailable_test");
+}
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
new file mode 100644
index 000000000000..6c16f77c722c
--- /dev/null
+++ b/tools/testing/selftests/proc/.gitignore
@@ -0,0 +1,8 @@
+/proc-loadavg-001
+/proc-self-map-files-001
+/proc-self-map-files-002
+/proc-self-syscall
+/proc-self-wchan
+/proc-uptime-001
+/proc-uptime-002
+/read
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile
new file mode 100644
index 000000000000..dbb87e56264c
--- /dev/null
+++ b/tools/testing/selftests/proc/Makefile
@@ -0,0 +1,13 @@
+CFLAGS += -Wall -O2
+
+TEST_GEN_PROGS :=
+TEST_GEN_PROGS += proc-loadavg-001
+TEST_GEN_PROGS += proc-self-map-files-001
+TEST_GEN_PROGS += proc-self-map-files-002
+TEST_GEN_PROGS += proc-self-syscall
+TEST_GEN_PROGS += proc-self-wchan
+TEST_GEN_PROGS += proc-uptime-001
+TEST_GEN_PROGS += proc-uptime-002
+TEST_GEN_PROGS += read
+
+include ../lib.mk
diff --git a/tools/testing/selftests/proc/config b/tools/testing/selftests/proc/config
new file mode 100644
index 000000000000..68fbd2b35884
--- /dev/null
+++ b/tools/testing/selftests/proc/config
@@ -0,0 +1 @@
+CONFIG_PROC_FS=y
diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c
new file mode 100644
index 000000000000..fcff7047000d
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-loadavg-001.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test that /proc/loadavg correctly reports last pid in pid namespace. */
+#define _GNU_SOURCE
+#include <errno.h>
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+int main(void)
+{
+ pid_t pid;
+ int wstatus;
+
+ if (unshare(CLONE_NEWPID) == -1) {
+ if (errno == ENOSYS || errno == EPERM)
+ return 2;
+ return 1;
+ }
+
+ pid = fork();
+ if (pid == -1)
+ return 1;
+ if (pid == 0) {
+ char buf[128], *p;
+ int fd;
+ ssize_t rv;
+
+ fd = open("/proc/loadavg" , O_RDONLY);
+ if (fd == -1)
+ return 1;
+ rv = read(fd, buf, sizeof(buf));
+ if (rv < 3)
+ return 1;
+ p = buf + rv;
+
+ /* pid 1 */
+ if (!(p[-3] == ' ' && p[-2] == '1' && p[-1] == '\n'))
+ return 1;
+
+ pid = fork();
+ if (pid == -1)
+ return 1;
+ if (pid == 0)
+ return 0;
+ if (waitpid(pid, NULL, 0) == -1)
+ return 1;
+
+ lseek(fd, 0, SEEK_SET);
+ rv = read(fd, buf, sizeof(buf));
+ if (rv < 3)
+ return 1;
+ p = buf + rv;
+
+ /* pid 2 */
+ if (!(p[-3] == ' ' && p[-2] == '2' && p[-1] == '\n'))
+ return 1;
+
+ return 0;
+ }
+
+ if (waitpid(pid, &wstatus, 0) == -1)
+ return 1;
+ if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0)
+ return 0;
+ return 1;
+}
diff --git a/tools/testing/selftests/proc/proc-self-map-files-001.c b/tools/testing/selftests/proc/proc-self-map-files-001.c
new file mode 100644
index 000000000000..4209c64283d6
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-map-files-001.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test readlink /proc/self/map_files/... */
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+
+static void pass(const char *fmt, unsigned long a, unsigned long b)
+{
+ char name[64];
+ char buf[64];
+
+ snprintf(name, sizeof(name), fmt, a, b);
+ if (readlink(name, buf, sizeof(buf)) == -1)
+ exit(1);
+}
+
+static void fail(const char *fmt, unsigned long a, unsigned long b)
+{
+ char name[64];
+ char buf[64];
+
+ snprintf(name, sizeof(name), fmt, a, b);
+ if (readlink(name, buf, sizeof(buf)) == -1 && errno == ENOENT)
+ return;
+ exit(1);
+}
+
+int main(void)
+{
+ const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+ void *p;
+ int fd;
+ unsigned long a, b;
+
+ fd = open("/dev/zero", O_RDONLY);
+ if (fd == -1)
+ return 1;
+
+ p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE, fd, 0);
+ if (p == MAP_FAILED)
+ return 1;
+
+ a = (unsigned long)p;
+ b = (unsigned long)p + PAGE_SIZE;
+
+ pass("/proc/self/map_files/%lx-%lx", a, b);
+ fail("/proc/self/map_files/ %lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx -%lx", a, b);
+ fail("/proc/self/map_files/%lx- %lx", a, b);
+ fail("/proc/self/map_files/%lx-%lx ", a, b);
+ fail("/proc/self/map_files/0%lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx-0%lx", a, b);
+ if (sizeof(long) == 4) {
+ fail("/proc/self/map_files/100000000%lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx-100000000%lx", a, b);
+ } else if (sizeof(long) == 8) {
+ fail("/proc/self/map_files/10000000000000000%lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx-10000000000000000%lx", a, b);
+ } else
+ return 1;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-self-map-files-002.c b/tools/testing/selftests/proc/proc-self-map-files-002.c
new file mode 100644
index 000000000000..6f1f4a6e1ecb
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-map-files-002.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test readlink /proc/self/map_files/... with address 0. */
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+
+static void pass(const char *fmt, unsigned long a, unsigned long b)
+{
+ char name[64];
+ char buf[64];
+
+ snprintf(name, sizeof(name), fmt, a, b);
+ if (readlink(name, buf, sizeof(buf)) == -1)
+ exit(1);
+}
+
+static void fail(const char *fmt, unsigned long a, unsigned long b)
+{
+ char name[64];
+ char buf[64];
+
+ snprintf(name, sizeof(name), fmt, a, b);
+ if (readlink(name, buf, sizeof(buf)) == -1 && errno == ENOENT)
+ return;
+ exit(1);
+}
+
+int main(void)
+{
+ const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+ void *p;
+ int fd;
+ unsigned long a, b;
+
+ fd = open("/dev/zero", O_RDONLY);
+ if (fd == -1)
+ return 1;
+
+ p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0);
+ if (p == MAP_FAILED) {
+ if (errno == EPERM)
+ return 2;
+ return 1;
+ }
+
+ a = (unsigned long)p;
+ b = (unsigned long)p + PAGE_SIZE;
+
+ pass("/proc/self/map_files/%lx-%lx", a, b);
+ fail("/proc/self/map_files/ %lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx -%lx", a, b);
+ fail("/proc/self/map_files/%lx- %lx", a, b);
+ fail("/proc/self/map_files/%lx-%lx ", a, b);
+ fail("/proc/self/map_files/0%lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx-0%lx", a, b);
+ if (sizeof(long) == 4) {
+ fail("/proc/self/map_files/100000000%lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx-100000000%lx", a, b);
+ } else if (sizeof(long) == 8) {
+ fail("/proc/self/map_files/10000000000000000%lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx-10000000000000000%lx", a, b);
+ } else
+ return 1;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c
new file mode 100644
index 000000000000..5ab5f4810e43
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-syscall.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdio.h>
+
+static inline ssize_t sys_read(int fd, void *buf, size_t len)
+{
+ return syscall(SYS_read, fd, buf, len);
+}
+
+int main(void)
+{
+ char buf1[64];
+ char buf2[64];
+ int fd;
+ ssize_t rv;
+
+ fd = open("/proc/self/syscall", O_RDONLY);
+ if (fd == -1) {
+ if (errno == ENOENT)
+ return 2;
+ return 1;
+ }
+
+ /* Do direct system call as libc can wrap anything. */
+ snprintf(buf1, sizeof(buf1), "%ld 0x%lx 0x%lx 0x%lx",
+ (long)SYS_read, (long)fd, (long)buf2, (long)sizeof(buf2));
+
+ memset(buf2, 0, sizeof(buf2));
+ rv = sys_read(fd, buf2, sizeof(buf2));
+ if (rv < 0)
+ return 1;
+ if (rv < strlen(buf1))
+ return 1;
+ if (strncmp(buf1, buf2, strlen(buf1)) != 0)
+ return 1;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-self-wchan.c b/tools/testing/selftests/proc/proc-self-wchan.c
new file mode 100644
index 000000000000..a38b2fbaa7ad
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-wchan.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+
+int main(void)
+{
+ char buf[64];
+ int fd;
+
+ fd = open("/proc/self/wchan", O_RDONLY);
+ if (fd == -1) {
+ if (errno == ENOENT)
+ return 2;
+ return 1;
+ }
+
+ buf[0] = '\0';
+ if (read(fd, buf, sizeof(buf)) != 1)
+ return 1;
+ if (buf[0] != '0')
+ return 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-uptime-001.c b/tools/testing/selftests/proc/proc-uptime-001.c
new file mode 100644
index 000000000000..781f7a50fc3f
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-uptime-001.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that values in /proc/uptime increment monotonically.
+#undef NDEBUG
+#include <assert.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "proc-uptime.h"
+
+int main(void)
+{
+ uint64_t start, u0, u1, i0, i1;
+ int fd;
+
+ fd = open("/proc/uptime", O_RDONLY);
+ assert(fd >= 0);
+
+ proc_uptime(fd, &u0, &i0);
+ start = u0;
+ do {
+ proc_uptime(fd, &u1, &i1);
+ assert(u1 >= u0);
+ assert(i1 >= i0);
+ u0 = u1;
+ i0 = i1;
+ } while (u1 - start < 100);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c
new file mode 100644
index 000000000000..30e2b7849089
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-uptime-002.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that values in /proc/uptime increment monotonically
+// while shifting across CPUs.
+#define _GNU_SOURCE
+#undef NDEBUG
+#include <assert.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "proc-uptime.h"
+
+static inline int sys_sched_getaffinity(pid_t pid, unsigned int len, unsigned long *m)
+{
+ return syscall(SYS_sched_getaffinity, pid, len, m);
+}
+
+static inline int sys_sched_setaffinity(pid_t pid, unsigned int len, unsigned long *m)
+{
+ return syscall(SYS_sched_setaffinity, pid, len, m);
+}
+
+int main(void)
+{
+ unsigned int len;
+ unsigned long *m;
+ unsigned int cpu;
+ uint64_t u0, u1, i0, i1;
+ int fd;
+
+ /* find out "nr_cpu_ids" */
+ m = NULL;
+ len = 0;
+ do {
+ len += sizeof(unsigned long);
+ free(m);
+ m = malloc(len);
+ } while (sys_sched_getaffinity(0, len, m) == -EINVAL);
+
+ fd = open("/proc/uptime", O_RDONLY);
+ assert(fd >= 0);
+
+ proc_uptime(fd, &u0, &i0);
+ for (cpu = 0; cpu < len * 8; cpu++) {
+ memset(m, 0, len);
+ m[cpu / (8 * sizeof(unsigned long))] |= 1UL << (cpu % (8 * sizeof(unsigned long)));
+
+ /* CPU might not exist, ignore error */
+ sys_sched_setaffinity(0, len, m);
+
+ proc_uptime(fd, &u1, &i1);
+ assert(u1 >= u0);
+ assert(i1 >= i0);
+ u0 = u1;
+ i0 = i1;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-uptime.h b/tools/testing/selftests/proc/proc-uptime.h
new file mode 100644
index 000000000000..0e464b50e9d9
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-uptime.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static unsigned long long xstrtoull(const char *p, char **end)
+{
+ if (*p == '0') {
+ *end = (char *)p + 1;
+ return 0;
+ } else if ('1' <= *p && *p <= '9') {
+ unsigned long long val;
+
+ errno = 0;
+ val = strtoull(p, end, 10);
+ assert(errno == 0);
+ return val;
+ } else
+ assert(0);
+}
+
+static void proc_uptime(int fd, uint64_t *uptime, uint64_t *idle)
+{
+ uint64_t val1, val2;
+ char buf[64], *p;
+ ssize_t rv;
+
+ /* save "p < end" checks */
+ memset(buf, 0, sizeof(buf));
+ rv = pread(fd, buf, sizeof(buf), 0);
+ assert(0 <= rv && rv <= sizeof(buf));
+ buf[sizeof(buf) - 1] = '\0';
+
+ p = buf;
+
+ val1 = xstrtoull(p, &p);
+ assert(p[0] == '.');
+ assert('0' <= p[1] && p[1] <= '9');
+ assert('0' <= p[2] && p[2] <= '9');
+ assert(p[3] == ' ');
+
+ val2 = (p[1] - '0') * 10 + p[2] - '0';
+ *uptime = val1 * 100 + val2;
+
+ p += 4;
+
+ val1 = xstrtoull(p, &p);
+ assert(p[0] == '.');
+ assert('0' <= p[1] && p[1] <= '9');
+ assert('0' <= p[2] && p[2] <= '9');
+ assert(p[3] == '\n');
+
+ val2 = (p[1] - '0') * 10 + p[2] - '0';
+ *idle = val1 * 100 + val2;
+
+ assert(p + 4 == buf + rv);
+}
diff --git a/tools/testing/selftests/proc/read.c b/tools/testing/selftests/proc/read.c
new file mode 100644
index 000000000000..1e73c2232097
--- /dev/null
+++ b/tools/testing/selftests/proc/read.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test
+// 1) read of every file in /proc
+// 2) readlink of every symlink in /proc
+// 3) recursively (1) + (2) for every directory in /proc
+// 4) write to /proc/*/clear_refs and /proc/*/task/*/clear_refs
+// 5) write to /proc/sysrq-trigger
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+static inline bool streq(const char *s1, const char *s2)
+{
+ return strcmp(s1, s2) == 0;
+}
+
+static struct dirent *xreaddir(DIR *d)
+{
+ struct dirent *de;
+
+ errno = 0;
+ de = readdir(d);
+ if (!de && errno != 0) {
+ exit(1);
+ }
+ return de;
+}
+
+static void f_reg(DIR *d, const char *filename)
+{
+ char buf[4096];
+ int fd;
+ ssize_t rv;
+
+ /* read from /proc/kmsg can block */
+ fd = openat(dirfd(d), filename, O_RDONLY|O_NONBLOCK);
+ if (fd == -1)
+ return;
+ rv = read(fd, buf, sizeof(buf));
+ assert((0 <= rv && rv <= sizeof(buf)) || rv == -1);
+ close(fd);
+}
+
+static void f_reg_write(DIR *d, const char *filename, const char *buf, size_t len)
+{
+ int fd;
+ ssize_t rv;
+
+ fd = openat(dirfd(d), filename, O_WRONLY);
+ if (fd == -1)
+ return;
+ rv = write(fd, buf, len);
+ assert((0 <= rv && rv <= len) || rv == -1);
+ close(fd);
+}
+
+static void f_lnk(DIR *d, const char *filename)
+{
+ char buf[4096];
+ ssize_t rv;
+
+ rv = readlinkat(dirfd(d), filename, buf, sizeof(buf));
+ assert((0 <= rv && rv <= sizeof(buf)) || rv == -1);
+}
+
+static void f(DIR *d, unsigned int level)
+{
+ struct dirent *de;
+
+ de = xreaddir(d);
+ assert(de->d_type == DT_DIR);
+ assert(streq(de->d_name, "."));
+
+ de = xreaddir(d);
+ assert(de->d_type == DT_DIR);
+ assert(streq(de->d_name, ".."));
+
+ while ((de = xreaddir(d))) {
+ assert(!streq(de->d_name, "."));
+ assert(!streq(de->d_name, ".."));
+
+ switch (de->d_type) {
+ DIR *dd;
+ int fd;
+
+ case DT_REG:
+ if (level == 0 && streq(de->d_name, "sysrq-trigger")) {
+ f_reg_write(d, de->d_name, "h", 1);
+ } else if (level == 1 && streq(de->d_name, "clear_refs")) {
+ f_reg_write(d, de->d_name, "1", 1);
+ } else if (level == 3 && streq(de->d_name, "clear_refs")) {
+ f_reg_write(d, de->d_name, "1", 1);
+ } else {
+ f_reg(d, de->d_name);
+ }
+ break;
+ case DT_DIR:
+ fd = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY);
+ if (fd == -1)
+ continue;
+ dd = fdopendir(fd);
+ if (!dd)
+ continue;
+ f(dd, level + 1);
+ closedir(dd);
+ break;
+ case DT_LNK:
+ f_lnk(d, de->d_name);
+ break;
+ default:
+ assert(0);
+ }
+ }
+}
+
+int main(void)
+{
+ DIR *d;
+
+ d = opendir("/proc");
+ if (!d)
+ return 2;
+ f(d, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 5df609950a66..168c66d74fc5 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -2860,6 +2860,7 @@ TEST(get_metadata)
int pipefd[2];
char buf;
struct seccomp_metadata md;
+ long ret;
ASSERT_EQ(0, pipe(pipefd));
@@ -2893,16 +2894,26 @@ TEST(get_metadata)
ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid));
ASSERT_EQ(pid, waitpid(pid, NULL, 0));
+ /* Past here must not use ASSERT or child process is never killed. */
+
md.filter_off = 0;
- ASSERT_EQ(sizeof(md), ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md));
+ errno = 0;
+ ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
+ EXPECT_EQ(sizeof(md), ret) {
+ if (errno == EINVAL)
+ XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
+ }
+
EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
EXPECT_EQ(md.filter_off, 0);
md.filter_off = 1;
- ASSERT_EQ(sizeof(md), ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md));
+ ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
+ EXPECT_EQ(sizeof(md), ret);
EXPECT_EQ(md.flags, 0);
EXPECT_EQ(md.filter_off, 1);
+skip:
ASSERT_EQ(0, kill(pid, SIGKILL));
}
diff --git a/tools/testing/selftests/x86/test_syscall_vdso.c b/tools/testing/selftests/x86/test_syscall_vdso.c
index 40370354d4c1..c9c3281077bc 100644
--- a/tools/testing/selftests/x86/test_syscall_vdso.c
+++ b/tools/testing/selftests/x86/test_syscall_vdso.c
@@ -100,12 +100,19 @@ asm (
" shl $32, %r8\n"
" orq $0x7f7f7f7f, %r8\n"
" movq %r8, %r9\n"
- " movq %r8, %r10\n"
- " movq %r8, %r11\n"
- " movq %r8, %r12\n"
- " movq %r8, %r13\n"
- " movq %r8, %r14\n"
- " movq %r8, %r15\n"
+ " incq %r9\n"
+ " movq %r9, %r10\n"
+ " incq %r10\n"
+ " movq %r10, %r11\n"
+ " incq %r11\n"
+ " movq %r11, %r12\n"
+ " incq %r12\n"
+ " movq %r12, %r13\n"
+ " incq %r13\n"
+ " movq %r13, %r14\n"
+ " incq %r14\n"
+ " movq %r14, %r15\n"
+ " incq %r15\n"
" ret\n"
" .code32\n"
" .popsection\n"
@@ -128,12 +135,13 @@ int check_regs64(void)
int err = 0;
int num = 8;
uint64_t *r64 = &regs64.r8;
+ uint64_t expected = 0x7f7f7f7f7f7f7f7fULL;
if (!kernel_is_64bit)
return 0;
do {
- if (*r64 == 0x7f7f7f7f7f7f7f7fULL)
+ if (*r64 == expected++)
continue; /* register did not change */
if (syscall_addr != (long)&int80) {
/*
@@ -147,18 +155,17 @@ int check_regs64(void)
continue;
}
} else {
- /* INT80 syscall entrypoint can be used by
+ /*
+ * INT80 syscall entrypoint can be used by
* 64-bit programs too, unlike SYSCALL/SYSENTER.
* Therefore it must preserve R12+
* (they are callee-saved registers in 64-bit C ABI).
*
- * This was probably historically not intended,
- * but R8..11 are clobbered (cleared to 0).
- * IOW: they are the only registers which aren't
- * preserved across INT80 syscall.
+ * Starting in Linux 4.17 (and any kernel that
+ * backports the change), R8..11 are preserved.
+ * Historically (and probably unintentionally), they
+ * were clobbered or zeroed.
*/
- if (*r64 == 0 && num <= 11)
- continue;
}
printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64);
err++;
diff --git a/tools/thermal/tmon/sysfs.c b/tools/thermal/tmon/sysfs.c
index 1c12536f2081..18f523557983 100644
--- a/tools/thermal/tmon/sysfs.c
+++ b/tools/thermal/tmon/sysfs.c
@@ -486,6 +486,7 @@ int zone_instance_to_index(int zone_inst)
int update_thermal_data()
{
int i;
+ int next_thermal_record = cur_thermal_record + 1;
char tz_name[256];
static unsigned long samples;
@@ -495,9 +496,9 @@ int update_thermal_data()
}
/* circular buffer for keeping historic data */
- if (cur_thermal_record >= NR_THERMAL_RECORDS)
- cur_thermal_record = 0;
- gettimeofday(&trec[cur_thermal_record].tv, NULL);
+ if (next_thermal_record >= NR_THERMAL_RECORDS)
+ next_thermal_record = 0;
+ gettimeofday(&trec[next_thermal_record].tv, NULL);
if (tmon_log) {
fprintf(tmon_log, "%lu ", ++samples);
fprintf(tmon_log, "%3.1f ", p_param.t_target);
@@ -507,11 +508,12 @@ int update_thermal_data()
snprintf(tz_name, 256, "%s/%s%d", THERMAL_SYSFS, TZONE,
ptdata.tzi[i].instance);
sysfs_get_ulong(tz_name, "temp",
- &trec[cur_thermal_record].temp[i]);
+ &trec[next_thermal_record].temp[i]);
if (tmon_log)
fprintf(tmon_log, "%lu ",
- trec[cur_thermal_record].temp[i]/1000);
+ trec[next_thermal_record].temp[i] / 1000);
}
+ cur_thermal_record = next_thermal_record;
for (i = 0; i < ptdata.nr_cooling_dev; i++) {
char cdev_name[256];
unsigned long val;
diff --git a/tools/thermal/tmon/tmon.c b/tools/thermal/tmon/tmon.c
index 9aa19652e8e8..b43138f8b862 100644
--- a/tools/thermal/tmon/tmon.c
+++ b/tools/thermal/tmon/tmon.c
@@ -336,7 +336,6 @@ int main(int argc, char **argv)
show_data_w();
show_cooling_device();
}
- cur_thermal_record++;
time_elapsed += ticktime;
controller_handler(trec[0].temp[target_tz_index] / 1000,
&yk);
diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c
index 477899c12c51..2d566fbd236b 100644
--- a/tools/virtio/ringtest/ptr_ring.c
+++ b/tools/virtio/ringtest/ptr_ring.c
@@ -17,6 +17,8 @@
#define likely(x) (__builtin_expect(!!(x), 1))
#define ALIGN(x, a) (((x) + (a) - 1) / (a) * (a))
#define SIZE_MAX (~(size_t)0)
+#define KMALLOC_MAX_SIZE SIZE_MAX
+#define BUG_ON(x) assert(x)
typedef pthread_spinlock_t spinlock_t;
@@ -57,6 +59,9 @@ static void kfree(void *p)
free(p);
}
+#define kvmalloc_array kmalloc_array
+#define kvfree kfree
+
static void spin_lock_init(spinlock_t *lock)
{
int r = pthread_spin_init(lock, 0);
OpenPOWER on IntegriCloud