diff options
Diffstat (limited to 'tools/testing')
49 files changed, 1843 insertions, 193 deletions
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index cb166be4918d..4ea385be528f 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -138,6 +138,7 @@ static u32 handle[] = { }; static unsigned long dimm_fail_cmd_flags[NUM_DCR]; +static int dimm_fail_cmd_code[NUM_DCR]; struct nfit_test_fw { enum intel_fw_update_state state; @@ -892,8 +893,11 @@ static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func) if (i >= ARRAY_SIZE(handle)) return -ENXIO; - if ((1 << func) & dimm_fail_cmd_flags[i]) + if ((1 << func) & dimm_fail_cmd_flags[i]) { + if (dimm_fail_cmd_code[i]) + return dimm_fail_cmd_code[i]; return -EIO; + } return i; } @@ -1162,12 +1166,12 @@ static int ars_state_init(struct device *dev, struct ars_state *ars_state) static void put_dimms(void *data) { - struct device **dimm_dev = data; + struct nfit_test *t = data; int i; - for (i = 0; i < NUM_DCR; i++) - if (dimm_dev[i]) - device_unregister(dimm_dev[i]); + for (i = 0; i < t->num_dcr; i++) + if (t->dimm_dev[i]) + device_unregister(t->dimm_dev[i]); } static struct class *nfit_test_dimm; @@ -1176,13 +1180,11 @@ static int dimm_name_to_id(struct device *dev) { int dimm; - if (sscanf(dev_name(dev), "test_dimm%d", &dimm) != 1 - || dimm >= NUM_DCR || dimm < 0) + if (sscanf(dev_name(dev), "test_dimm%d", &dimm) != 1) return -ENXIO; return dimm; } - static ssize_t handle_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1191,7 +1193,7 @@ static ssize_t handle_show(struct device *dev, struct device_attribute *attr, if (dimm < 0) return dimm; - return sprintf(buf, "%#x", handle[dimm]); + return sprintf(buf, "%#x\n", handle[dimm]); } DEVICE_ATTR_RO(handle); @@ -1225,8 +1227,39 @@ static ssize_t fail_cmd_store(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RW(fail_cmd); +static ssize_t fail_cmd_code_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + int dimm = dimm_name_to_id(dev); + + if (dimm < 0) + return dimm; + + return sprintf(buf, "%d\n", dimm_fail_cmd_code[dimm]); +} + +static ssize_t fail_cmd_code_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + int dimm = dimm_name_to_id(dev); + unsigned long val; + ssize_t rc; + + if (dimm < 0) + return dimm; + + rc = kstrtol(buf, 0, &val); + if (rc) + return rc; + + dimm_fail_cmd_code[dimm] = val; + return size; +} +static DEVICE_ATTR_RW(fail_cmd_code); + static struct attribute *nfit_test_dimm_attributes[] = { &dev_attr_fail_cmd.attr, + &dev_attr_fail_cmd_code.attr, &dev_attr_handle.attr, NULL, }; @@ -1240,6 +1273,23 @@ static const struct attribute_group *nfit_test_dimm_attribute_groups[] = { NULL, }; +static int nfit_test_dimm_init(struct nfit_test *t) +{ + int i; + + if (devm_add_action_or_reset(&t->pdev.dev, put_dimms, t)) + return -ENOMEM; + for (i = 0; i < t->num_dcr; i++) { + t->dimm_dev[i] = device_create_with_groups(nfit_test_dimm, + &t->pdev.dev, 0, NULL, + nfit_test_dimm_attribute_groups, + "test_dimm%d", i + t->dcr_idx); + if (!t->dimm_dev[i]) + return -ENOMEM; + } + return 0; +} + static void smart_init(struct nfit_test *t) { int i; @@ -1335,17 +1385,8 @@ static int nfit_test0_alloc(struct nfit_test *t) if (!t->_fit) return -ENOMEM; - if (devm_add_action_or_reset(&t->pdev.dev, put_dimms, t->dimm_dev)) + if (nfit_test_dimm_init(t)) return -ENOMEM; - for (i = 0; i < NUM_DCR; i++) { - t->dimm_dev[i] = device_create_with_groups(nfit_test_dimm, - &t->pdev.dev, 0, NULL, - nfit_test_dimm_attribute_groups, - "test_dimm%d", i); - if (!t->dimm_dev[i]) - return -ENOMEM; - } - smart_init(t); return ars_state_init(&t->pdev.dev, &t->ars_state); } @@ -1377,6 +1418,8 @@ static int nfit_test1_alloc(struct nfit_test *t) if (!t->spa_set[1]) return -ENOMEM; + if (nfit_test_dimm_init(t)) + return -ENOMEM; smart_init(t); return ars_state_init(&t->pdev.dev, &t->ars_state); } @@ -2222,6 +2265,9 @@ static void nfit_test1_setup(struct nfit_test *t) set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); } static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index fa7ee369b3c9..db66f8a0d4be 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -17,7 +17,7 @@ ifeq ($(BUILD), 32) LDFLAGS += -m32 endif -targets: mapshift $(TARGETS) +targets: generated/map-shift.h $(TARGETS) main: $(OFILES) @@ -42,9 +42,7 @@ radix-tree.c: ../../../lib/radix-tree.c idr.c: ../../../lib/idr.c sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@ -.PHONY: mapshift - -mapshift: +generated/map-shift.h: @if ! grep -qws $(SHIFT) generated/map-shift.h; then \ echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" > \ generated/map-shift.h; \ diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 6c645eb77d42..ee820fcc29b0 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -252,6 +252,13 @@ void idr_checks(void) idr_remove(&idr, 3); idr_remove(&idr, 0); + assert(idr_alloc(&idr, DUMMY_PTR, 0, 0, GFP_KERNEL) == 0); + idr_remove(&idr, 1); + for (i = 1; i < RADIX_TREE_MAP_SIZE; i++) + assert(idr_alloc(&idr, DUMMY_PTR, 0, 0, GFP_KERNEL) == i); + idr_remove(&idr, 1 << 30); + idr_destroy(&idr); + for (i = INT_MAX - 3UL; i < INT_MAX + 1UL; i++) { struct item *item = item_create(i, 0); assert(idr_alloc(&idr, item, i, i + 10, GFP_KERNEL) == i); diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 59245b3d587c..7bf405638b0b 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -16,6 +16,7 @@ #include <linux/radix-tree.h> #include <linux/slab.h> #include <linux/errno.h> +#include <pthread.h> #include "test.h" @@ -624,6 +625,67 @@ static void multiorder_account(void) item_kill_tree(&tree); } +bool stop_iteration = false; + +static void *creator_func(void *ptr) +{ + /* 'order' is set up to ensure we have sibling entries */ + unsigned int order = RADIX_TREE_MAP_SHIFT - 1; + struct radix_tree_root *tree = ptr; + int i; + + for (i = 0; i < 10000; i++) { + item_insert_order(tree, 0, order); + item_delete_rcu(tree, 0); + } + + stop_iteration = true; + return NULL; +} + +static void *iterator_func(void *ptr) +{ + struct radix_tree_root *tree = ptr; + struct radix_tree_iter iter; + struct item *item; + void **slot; + + while (!stop_iteration) { + rcu_read_lock(); + radix_tree_for_each_slot(slot, tree, &iter, 0) { + item = radix_tree_deref_slot(slot); + + if (!item) + continue; + if (radix_tree_deref_retry(item)) { + slot = radix_tree_iter_retry(&iter); + continue; + } + + item_sanity(item, iter.index); + } + rcu_read_unlock(); + } + return NULL; +} + +static void multiorder_iteration_race(void) +{ + const int num_threads = sysconf(_SC_NPROCESSORS_ONLN); + pthread_t worker_thread[num_threads]; + RADIX_TREE(tree, GFP_KERNEL); + int i; + + pthread_create(&worker_thread[0], NULL, &creator_func, &tree); + for (i = 1; i < num_threads; i++) + pthread_create(&worker_thread[i], NULL, &iterator_func, &tree); + + for (i = 0; i < num_threads; i++) + pthread_join(worker_thread[i], NULL); + + item_kill_tree(&tree); +} + void multiorder_checks(void) { int i; @@ -644,6 +706,7 @@ void multiorder_checks(void) multiorder_join(); multiorder_split(); multiorder_account(); + multiorder_iteration_race(); radix_tree_cpu_dead(0); } diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c index 5978ab1f403d..def6015570b2 100644 --- a/tools/testing/radix-tree/test.c +++ b/tools/testing/radix-tree/test.c @@ -75,6 +75,25 @@ int item_delete(struct radix_tree_root *root, unsigned long index) return 0; } +static void item_free_rcu(struct rcu_head *head) +{ + struct item *item = container_of(head, struct item, rcu_head); + + free(item); +} + +int item_delete_rcu(struct radix_tree_root *root, unsigned long index) +{ + struct item *item = radix_tree_delete(root, index); + + if (item) { + item_sanity(item, index); + call_rcu(&item->rcu_head, item_free_rcu); + return 1; + } + return 0; +} + void item_check_present(struct radix_tree_root *root, unsigned long index) { struct item *item; diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index d9c031dbeb1a..31f1d9b6f506 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -5,6 +5,7 @@ #include <linux/rcupdate.h> struct item { + struct rcu_head rcu_head; unsigned long index; unsigned int order; }; @@ -12,9 +13,11 @@ struct item { struct item *item_create(unsigned long index, unsigned int order); int __item_insert(struct radix_tree_root *root, struct item *item); int item_insert(struct radix_tree_root *root, unsigned long index); +void item_sanity(struct item *item, unsigned long index); int item_insert_order(struct radix_tree_root *root, unsigned long index, unsigned order); int item_delete(struct radix_tree_root *root, unsigned long index); +int item_delete_rcu(struct radix_tree_root *root, unsigned long index); struct item *item_lookup(struct radix_tree_root *root, unsigned long index); void item_check_present(struct radix_tree_root *root, unsigned long index); diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 9cf83f895d98..5e1ab2f0eb79 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -12,3 +12,6 @@ test_tcpbpf_user test_verifier_log feature test_libbpf_open +test_sock +test_sock_addr +urandom_read diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 983dd25d49f4..1eefe211a4a8 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -5,3 +5,5 @@ CONFIG_BPF_EVENTS=y CONFIG_TEST_BPF=m CONFIG_CGROUP_BPF=y CONFIG_NETDEVSIM=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_SCH_INGRESS=y diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index faadbe233966..4123d0ab90ba 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1108,7 +1108,7 @@ static void test_stacktrace_build_id(void) assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null") == 0); - assert(system("./urandom_read if=/dev/urandom of=/dev/zero count=4 2> /dev/null") == 0); + assert(system("./urandom_read") == 0); /* disable stack trace collection */ key = 0; val = 1; @@ -1158,7 +1158,7 @@ static void test_stacktrace_build_id(void) } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0); CHECK(build_id_matches < 1, "build id match", - "Didn't find expected build ID from the map"); + "Didn't find expected build ID from the map\n"); disable_pmu: ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c index 73bb20cfb9b7..f4d99fabc56d 100644 --- a/tools/testing/selftests/bpf/test_sock.c +++ b/tools/testing/selftests/bpf/test_sock.c @@ -13,6 +13,7 @@ #include <bpf/bpf.h> #include "cgroup_helpers.h" +#include "bpf_rlimit.h" #ifndef ARRAY_SIZE # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index d488f20926e8..2950f80ba7fb 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -15,6 +15,7 @@ #include <bpf/libbpf.h> #include "cgroup_helpers.h" +#include "bpf_rlimit.h" #define CG_PATH "/foo" #define CONNECT4_PROG_PATH "./connect4_prog.o" diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh index c6e1dcf992c4..9832a875a828 100755 --- a/tools/testing/selftests/bpf/test_sock_addr.sh +++ b/tools/testing/selftests/bpf/test_sock_addr.sh @@ -4,7 +4,7 @@ set -eu ping_once() { - ping -q -c 1 -W 1 ${1%%/*} >/dev/null 2>&1 + ping -${1} -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1 } wait_for_ip() @@ -13,7 +13,7 @@ wait_for_ip() echo -n "Wait for testing IPv4/IPv6 to become available " for _i in $(seq ${MAX_PING_TRIES}); do echo -n "." - if ping_once ${TEST_IPv4} && ping_once ${TEST_IPv6}; then + if ping_once 4 ${TEST_IPv4} && ping_once 6 ${TEST_IPv6}; then echo " OK" return fi diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 3e7718b1a9ae..fd7de7eb329e 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -11713,6 +11713,11 @@ static void get_unpriv_disabled() FILE *fd; fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r"); + if (!fd) { + perror("fopen /proc/sys/"UNPRIV_SYSCTL); + unpriv_disabled = true; + return; + } if (fgets(buf, 2, fd) == buf && atoi(buf)) unpriv_disabled = true; fclose(fd); diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile index 4e6d09fb166f..5c7d7001ad37 100644 --- a/tools/testing/selftests/filesystems/Makefile +++ b/tools/testing/selftests/filesystems/Makefile @@ -1,8 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS := dnotify_test devpts_pts -all: $(TEST_PROGS) -include ../lib.mk +TEST_GEN_PROGS := devpts_pts +TEST_GEN_PROGS_EXTENDED := dnotify_test -clean: - rm -fr $(TEST_PROGS) +include ../lib.mk diff --git a/tools/testing/selftests/firmware/Makefile b/tools/testing/selftests/firmware/Makefile index 826f38d5dd19..261c81f08606 100644 --- a/tools/testing/selftests/firmware/Makefile +++ b/tools/testing/selftests/firmware/Makefile @@ -4,6 +4,7 @@ all: TEST_PROGS := fw_run_tests.sh +TEST_FILES := fw_fallback.sh fw_filesystem.sh fw_lib.sh include ../lib.mk diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh index 9ea31b57d71a..962d7f4ac627 100755 --- a/tools/testing/selftests/firmware/fw_lib.sh +++ b/tools/testing/selftests/firmware/fw_lib.sh @@ -154,11 +154,13 @@ test_finish() if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout fi - if [ "$OLD_FWPATH" = "" ]; then - OLD_FWPATH=" " - fi if [ "$TEST_REQS_FW_SET_CUSTOM_PATH" = "yes" ]; then - echo -n "$OLD_FWPATH" >/sys/module/firmware_class/parameters/path + if [ "$OLD_FWPATH" = "" ]; then + # A zero-length write won't work; write a null byte + printf '\000' >/sys/module/firmware_class/parameters/path + else + echo -n "$OLD_FWPATH" >/sys/module/firmware_class/parameters/path + fi fi if [ -f $FW ]; then rm -f "$FW" diff --git a/tools/testing/selftests/firmware/fw_run_tests.sh b/tools/testing/selftests/firmware/fw_run_tests.sh index 06d638e9dc62..cffdd4eb0a57 100755 --- a/tools/testing/selftests/firmware/fw_run_tests.sh +++ b/tools/testing/selftests/firmware/fw_run_tests.sh @@ -66,5 +66,5 @@ if [ -f $FW_FORCE_SYSFS_FALLBACK ]; then run_test_config_0003 else echo "Running basic kernel configuration, working with your config" - run_test + run_tests fi diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc index 786dce7e48be..2aabab363cfb 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc @@ -29,7 +29,7 @@ do_reset echo "Test extended error support" echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger -echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger &>/dev/null +! echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger 2> /dev/null if ! grep -q "ERROR:" events/sched/sched_wakeup/hist; then fail "Failed to generate extended error in histogram" fi diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc new file mode 100644 index 000000000000..c193dce611a2 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc @@ -0,0 +1,44 @@ +#!/bin/sh +# description: event trigger - test multiple actions on hist trigger + + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +clear_synthetic_events +reset_tracer +do_reset + +echo "Test multiple actions on hist trigger" +echo 'wakeup_latency u64 lat; pid_t pid' >> synthetic_events +TRIGGER1=events/sched/sched_wakeup/trigger +TRIGGER2=events/sched/sched_switch/trigger + +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="cyclictest"' > $TRIGGER1 +echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0 if next_comm=="cyclictest"' >> $TRIGGER2 +echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,next_pid) if next_comm=="cyclictest"' >> $TRIGGER2 +echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,prev_pid) if next_comm=="cyclictest"' >> $TRIGGER2 +echo 'hist:keys=next_pid if next_comm=="cyclictest"' >> $TRIGGER2 + +do_reset + +exit 0 diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index dc44de904797..d9d00319b07c 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -4,17 +4,18 @@ top_srcdir = ../../../../ UNAME_M := $(shell uname -m) LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c -LIBKVM_x86_64 = lib/x86.c +LIBKVM_x86_64 = lib/x86.c lib/vmx.c TEST_GEN_PROGS_x86_64 = set_sregs_test TEST_GEN_PROGS_x86_64 += sync_regs_test +TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) LIBKVM += $(LIBKVM_$(UNAME_M)) INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ -CFLAGS += -O2 -g -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) +CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -I.. # After inclusion, $(OUTPUT) is defined and # $(TEST_GEN_PROGS) starts with $(OUTPUT)/ diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 57974ad46373..637b7017b6ee 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -112,24 +112,27 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, uint32_t memslot); -void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid); +struct kvm_cpuid2 *kvm_get_supported_cpuid(void); void vcpu_set_cpuid( struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid); -struct kvm_cpuid2 *allocate_kvm_cpuid2(void); struct kvm_cpuid_entry2 * -find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function, - uint32_t index); +kvm_get_supported_cpuid_index(uint32_t function, uint32_t index); static inline struct kvm_cpuid_entry2 * -find_cpuid_entry(struct kvm_cpuid2 *cpuid, uint32_t function) +kvm_get_supported_cpuid_entry(uint32_t function) { - return find_cpuid_index_entry(cpuid, function, 0); + return kvm_get_supported_cpuid_index(function, 0); } struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code); void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); +typedef void (*vmx_guest_code_t)(vm_vaddr_t vmxon_vaddr, + vm_paddr_t vmxon_paddr, + vm_vaddr_t vmcs_vaddr, + vm_paddr_t vmcs_paddr); + struct kvm_userspace_memory_region * kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end); diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 7ab98e41324f..ac53730b30aa 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -19,6 +19,7 @@ #include <errno.h> #include <unistd.h> #include <fcntl.h> +#include "kselftest.h" ssize_t test_write(int fd, const void *buf, size_t count); ssize_t test_read(int fd, void *buf, size_t count); diff --git a/tools/testing/selftests/kvm/include/vmx.h b/tools/testing/selftests/kvm/include/vmx.h new file mode 100644 index 000000000000..6ed8499807fd --- /dev/null +++ b/tools/testing/selftests/kvm/include/vmx.h @@ -0,0 +1,494 @@ +/* + * tools/testing/selftests/kvm/include/vmx.h + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + */ + +#ifndef SELFTEST_KVM_VMX_H +#define SELFTEST_KVM_VMX_H + +#include <stdint.h> +#include "x86.h" + +#define CPUID_VMX_BIT 5 + +#define CPUID_VMX (1 << 5) + +/* + * Definitions of Primary Processor-Based VM-Execution Controls. + */ +#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 +#define CPU_BASED_USE_TSC_OFFSETING 0x00000008 +#define CPU_BASED_HLT_EXITING 0x00000080 +#define CPU_BASED_INVLPG_EXITING 0x00000200 +#define CPU_BASED_MWAIT_EXITING 0x00000400 +#define CPU_BASED_RDPMC_EXITING 0x00000800 +#define CPU_BASED_RDTSC_EXITING 0x00001000 +#define CPU_BASED_CR3_LOAD_EXITING 0x00008000 +#define CPU_BASED_CR3_STORE_EXITING 0x00010000 +#define CPU_BASED_CR8_LOAD_EXITING 0x00080000 +#define CPU_BASED_CR8_STORE_EXITING 0x00100000 +#define CPU_BASED_TPR_SHADOW 0x00200000 +#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000 +#define CPU_BASED_MOV_DR_EXITING 0x00800000 +#define CPU_BASED_UNCOND_IO_EXITING 0x01000000 +#define CPU_BASED_USE_IO_BITMAPS 0x02000000 +#define CPU_BASED_MONITOR_TRAP 0x08000000 +#define CPU_BASED_USE_MSR_BITMAPS 0x10000000 +#define CPU_BASED_MONITOR_EXITING 0x20000000 +#define CPU_BASED_PAUSE_EXITING 0x40000000 +#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 + +#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172 + +/* + * Definitions of Secondary Processor-Based VM-Execution Controls. + */ +#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 +#define SECONDARY_EXEC_ENABLE_EPT 0x00000002 +#define SECONDARY_EXEC_DESC 0x00000004 +#define SECONDARY_EXEC_RDTSCP 0x00000008 +#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 +#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 +#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 +#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 +#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 +#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 +#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 +#define SECONDARY_EXEC_RDRAND_EXITING 0x00000800 +#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 +#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 +#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 +#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000 +#define SECONDARY_EXEC_ENABLE_PML 0x00020000 +#define SECONDARY_EPT_VE 0x00040000 +#define SECONDARY_ENABLE_XSAV_RESTORE 0x00100000 +#define SECONDARY_EXEC_TSC_SCALING 0x02000000 + +#define PIN_BASED_EXT_INTR_MASK 0x00000001 +#define PIN_BASED_NMI_EXITING 0x00000008 +#define PIN_BASED_VIRTUAL_NMIS 0x00000020 +#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 +#define PIN_BASED_POSTED_INTR 0x00000080 + +#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 + +#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004 +#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 +#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000 +#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 +#define VM_EXIT_SAVE_IA32_PAT 0x00040000 +#define VM_EXIT_LOAD_IA32_PAT 0x00080000 +#define VM_EXIT_SAVE_IA32_EFER 0x00100000 +#define VM_EXIT_LOAD_IA32_EFER 0x00200000 +#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 + +#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff + +#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004 +#define VM_ENTRY_IA32E_MODE 0x00000200 +#define VM_ENTRY_SMM 0x00000400 +#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 +#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000 +#define VM_ENTRY_LOAD_IA32_PAT 0x00004000 +#define VM_ENTRY_LOAD_IA32_EFER 0x00008000 + +#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff + +#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f +#define VMX_MISC_SAVE_EFER_LMA 0x00000020 + +#define EXIT_REASON_FAILED_VMENTRY 0x80000000 +#define EXIT_REASON_EXCEPTION_NMI 0 +#define EXIT_REASON_EXTERNAL_INTERRUPT 1 +#define EXIT_REASON_TRIPLE_FAULT 2 +#define EXIT_REASON_PENDING_INTERRUPT 7 +#define EXIT_REASON_NMI_WINDOW 8 +#define EXIT_REASON_TASK_SWITCH 9 +#define EXIT_REASON_CPUID 10 +#define EXIT_REASON_HLT 12 +#define EXIT_REASON_INVD 13 +#define EXIT_REASON_INVLPG 14 +#define EXIT_REASON_RDPMC 15 +#define EXIT_REASON_RDTSC 16 +#define EXIT_REASON_VMCALL 18 +#define EXIT_REASON_VMCLEAR 19 +#define EXIT_REASON_VMLAUNCH 20 +#define EXIT_REASON_VMPTRLD 21 +#define EXIT_REASON_VMPTRST 22 +#define EXIT_REASON_VMREAD 23 +#define EXIT_REASON_VMRESUME 24 +#define EXIT_REASON_VMWRITE 25 +#define EXIT_REASON_VMOFF 26 +#define EXIT_REASON_VMON 27 +#define EXIT_REASON_CR_ACCESS 28 +#define EXIT_REASON_DR_ACCESS 29 +#define EXIT_REASON_IO_INSTRUCTION 30 +#define EXIT_REASON_MSR_READ 31 +#define EXIT_REASON_MSR_WRITE 32 +#define EXIT_REASON_INVALID_STATE 33 +#define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_MONITOR_INSTRUCTION 39 +#define EXIT_REASON_PAUSE_INSTRUCTION 40 +#define EXIT_REASON_MCE_DURING_VMENTRY 41 +#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 +#define EXIT_REASON_APIC_ACCESS 44 +#define EXIT_REASON_EOI_INDUCED 45 +#define EXIT_REASON_EPT_VIOLATION 48 +#define EXIT_REASON_EPT_MISCONFIG 49 +#define EXIT_REASON_INVEPT 50 +#define EXIT_REASON_RDTSCP 51 +#define EXIT_REASON_PREEMPTION_TIMER 52 +#define EXIT_REASON_INVVPID 53 +#define EXIT_REASON_WBINVD 54 +#define EXIT_REASON_XSETBV 55 +#define EXIT_REASON_APIC_WRITE 56 +#define EXIT_REASON_INVPCID 58 +#define EXIT_REASON_PML_FULL 62 +#define EXIT_REASON_XSAVES 63 +#define EXIT_REASON_XRSTORS 64 +#define LAST_EXIT_REASON 64 + +enum vmcs_field { + VIRTUAL_PROCESSOR_ID = 0x00000000, + POSTED_INTR_NV = 0x00000002, + GUEST_ES_SELECTOR = 0x00000800, + GUEST_CS_SELECTOR = 0x00000802, + GUEST_SS_SELECTOR = 0x00000804, + GUEST_DS_SELECTOR = 0x00000806, + GUEST_FS_SELECTOR = 0x00000808, + GUEST_GS_SELECTOR = 0x0000080a, + GUEST_LDTR_SELECTOR = 0x0000080c, + GUEST_TR_SELECTOR = 0x0000080e, + GUEST_INTR_STATUS = 0x00000810, + GUEST_PML_INDEX = 0x00000812, + HOST_ES_SELECTOR = 0x00000c00, + HOST_CS_SELECTOR = 0x00000c02, + HOST_SS_SELECTOR = 0x00000c04, + HOST_DS_SELECTOR = 0x00000c06, + HOST_FS_SELECTOR = 0x00000c08, + HOST_GS_SELECTOR = 0x00000c0a, + HOST_TR_SELECTOR = 0x00000c0c, + IO_BITMAP_A = 0x00002000, + IO_BITMAP_A_HIGH = 0x00002001, + IO_BITMAP_B = 0x00002002, + IO_BITMAP_B_HIGH = 0x00002003, + MSR_BITMAP = 0x00002004, + MSR_BITMAP_HIGH = 0x00002005, + VM_EXIT_MSR_STORE_ADDR = 0x00002006, + VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007, + VM_EXIT_MSR_LOAD_ADDR = 0x00002008, + VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009, + VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a, + VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b, + PML_ADDRESS = 0x0000200e, + PML_ADDRESS_HIGH = 0x0000200f, + TSC_OFFSET = 0x00002010, + TSC_OFFSET_HIGH = 0x00002011, + VIRTUAL_APIC_PAGE_ADDR = 0x00002012, + VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, + APIC_ACCESS_ADDR = 0x00002014, + APIC_ACCESS_ADDR_HIGH = 0x00002015, + POSTED_INTR_DESC_ADDR = 0x00002016, + POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, + EPT_POINTER = 0x0000201a, + EPT_POINTER_HIGH = 0x0000201b, + EOI_EXIT_BITMAP0 = 0x0000201c, + EOI_EXIT_BITMAP0_HIGH = 0x0000201d, + EOI_EXIT_BITMAP1 = 0x0000201e, + EOI_EXIT_BITMAP1_HIGH = 0x0000201f, + EOI_EXIT_BITMAP2 = 0x00002020, + EOI_EXIT_BITMAP2_HIGH = 0x00002021, + EOI_EXIT_BITMAP3 = 0x00002022, + EOI_EXIT_BITMAP3_HIGH = 0x00002023, + VMREAD_BITMAP = 0x00002026, + VMREAD_BITMAP_HIGH = 0x00002027, + VMWRITE_BITMAP = 0x00002028, + VMWRITE_BITMAP_HIGH = 0x00002029, + XSS_EXIT_BITMAP = 0x0000202C, + XSS_EXIT_BITMAP_HIGH = 0x0000202D, + TSC_MULTIPLIER = 0x00002032, + TSC_MULTIPLIER_HIGH = 0x00002033, + GUEST_PHYSICAL_ADDRESS = 0x00002400, + GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, + VMCS_LINK_POINTER = 0x00002800, + VMCS_LINK_POINTER_HIGH = 0x00002801, + GUEST_IA32_DEBUGCTL = 0x00002802, + GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, + GUEST_IA32_PAT = 0x00002804, + GUEST_IA32_PAT_HIGH = 0x00002805, + GUEST_IA32_EFER = 0x00002806, + GUEST_IA32_EFER_HIGH = 0x00002807, + GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808, + GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809, + GUEST_PDPTR0 = 0x0000280a, + GUEST_PDPTR0_HIGH = 0x0000280b, + GUEST_PDPTR1 = 0x0000280c, + GUEST_PDPTR1_HIGH = 0x0000280d, + GUEST_PDPTR2 = 0x0000280e, + GUEST_PDPTR2_HIGH = 0x0000280f, + GUEST_PDPTR3 = 0x00002810, + GUEST_PDPTR3_HIGH = 0x00002811, + GUEST_BNDCFGS = 0x00002812, + GUEST_BNDCFGS_HIGH = 0x00002813, + HOST_IA32_PAT = 0x00002c00, + HOST_IA32_PAT_HIGH = 0x00002c01, + HOST_IA32_EFER = 0x00002c02, + HOST_IA32_EFER_HIGH = 0x00002c03, + HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04, + HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05, + PIN_BASED_VM_EXEC_CONTROL = 0x00004000, + CPU_BASED_VM_EXEC_CONTROL = 0x00004002, + EXCEPTION_BITMAP = 0x00004004, + PAGE_FAULT_ERROR_CODE_MASK = 0x00004006, + PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008, + CR3_TARGET_COUNT = 0x0000400a, + VM_EXIT_CONTROLS = 0x0000400c, + VM_EXIT_MSR_STORE_COUNT = 0x0000400e, + VM_EXIT_MSR_LOAD_COUNT = 0x00004010, + VM_ENTRY_CONTROLS = 0x00004012, + VM_ENTRY_MSR_LOAD_COUNT = 0x00004014, + VM_ENTRY_INTR_INFO_FIELD = 0x00004016, + VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018, + VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, + TPR_THRESHOLD = 0x0000401c, + SECONDARY_VM_EXEC_CONTROL = 0x0000401e, + PLE_GAP = 0x00004020, + PLE_WINDOW = 0x00004022, + VM_INSTRUCTION_ERROR = 0x00004400, + VM_EXIT_REASON = 0x00004402, + VM_EXIT_INTR_INFO = 0x00004404, + VM_EXIT_INTR_ERROR_CODE = 0x00004406, + IDT_VECTORING_INFO_FIELD = 0x00004408, + IDT_VECTORING_ERROR_CODE = 0x0000440a, + VM_EXIT_INSTRUCTION_LEN = 0x0000440c, + VMX_INSTRUCTION_INFO = 0x0000440e, + GUEST_ES_LIMIT = 0x00004800, + GUEST_CS_LIMIT = 0x00004802, + GUEST_SS_LIMIT = 0x00004804, + GUEST_DS_LIMIT = 0x00004806, + GUEST_FS_LIMIT = 0x00004808, + GUEST_GS_LIMIT = 0x0000480a, + GUEST_LDTR_LIMIT = 0x0000480c, + GUEST_TR_LIMIT = 0x0000480e, + GUEST_GDTR_LIMIT = 0x00004810, + GUEST_IDTR_LIMIT = 0x00004812, + GUEST_ES_AR_BYTES = 0x00004814, + GUEST_CS_AR_BYTES = 0x00004816, + GUEST_SS_AR_BYTES = 0x00004818, + GUEST_DS_AR_BYTES = 0x0000481a, + GUEST_FS_AR_BYTES = 0x0000481c, + GUEST_GS_AR_BYTES = 0x0000481e, + GUEST_LDTR_AR_BYTES = 0x00004820, + GUEST_TR_AR_BYTES = 0x00004822, + GUEST_INTERRUPTIBILITY_INFO = 0x00004824, + GUEST_ACTIVITY_STATE = 0X00004826, + GUEST_SYSENTER_CS = 0x0000482A, + VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, + HOST_IA32_SYSENTER_CS = 0x00004c00, + CR0_GUEST_HOST_MASK = 0x00006000, + CR4_GUEST_HOST_MASK = 0x00006002, + CR0_READ_SHADOW = 0x00006004, + CR4_READ_SHADOW = 0x00006006, + CR3_TARGET_VALUE0 = 0x00006008, + CR3_TARGET_VALUE1 = 0x0000600a, + CR3_TARGET_VALUE2 = 0x0000600c, + CR3_TARGET_VALUE3 = 0x0000600e, + EXIT_QUALIFICATION = 0x00006400, + GUEST_LINEAR_ADDRESS = 0x0000640a, + GUEST_CR0 = 0x00006800, + GUEST_CR3 = 0x00006802, + GUEST_CR4 = 0x00006804, + GUEST_ES_BASE = 0x00006806, + GUEST_CS_BASE = 0x00006808, + GUEST_SS_BASE = 0x0000680a, + GUEST_DS_BASE = 0x0000680c, + GUEST_FS_BASE = 0x0000680e, + GUEST_GS_BASE = 0x00006810, + GUEST_LDTR_BASE = 0x00006812, + GUEST_TR_BASE = 0x00006814, + GUEST_GDTR_BASE = 0x00006816, + GUEST_IDTR_BASE = 0x00006818, + GUEST_DR7 = 0x0000681a, + GUEST_RSP = 0x0000681c, + GUEST_RIP = 0x0000681e, + GUEST_RFLAGS = 0x00006820, + GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822, + GUEST_SYSENTER_ESP = 0x00006824, + GUEST_SYSENTER_EIP = 0x00006826, + HOST_CR0 = 0x00006c00, + HOST_CR3 = 0x00006c02, + HOST_CR4 = 0x00006c04, + HOST_FS_BASE = 0x00006c06, + HOST_GS_BASE = 0x00006c08, + HOST_TR_BASE = 0x00006c0a, + HOST_GDTR_BASE = 0x00006c0c, + HOST_IDTR_BASE = 0x00006c0e, + HOST_IA32_SYSENTER_ESP = 0x00006c10, + HOST_IA32_SYSENTER_EIP = 0x00006c12, + HOST_RSP = 0x00006c14, + HOST_RIP = 0x00006c16, +}; + +struct vmx_msr_entry { + uint32_t index; + uint32_t reserved; + uint64_t value; +} __attribute__ ((aligned(16))); + +static inline int vmxon(uint64_t phys) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmxon %[pa]; setna %[ret]" + : [ret]"=rm"(ret) + : [pa]"m"(phys) + : "cc", "memory"); + + return ret; +} + +static inline void vmxoff(void) +{ + __asm__ __volatile__("vmxoff"); +} + +static inline int vmclear(uint64_t vmcs_pa) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmclear %[pa]; setna %[ret]" + : [ret]"=rm"(ret) + : [pa]"m"(vmcs_pa) + : "cc", "memory"); + + return ret; +} + +static inline int vmptrld(uint64_t vmcs_pa) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]" + : [ret]"=rm"(ret) + : [pa]"m"(vmcs_pa) + : "cc", "memory"); + + return ret; +} + +/* + * No guest state (e.g. GPRs) is established by this vmlaunch. + */ +static inline int vmlaunch(void) +{ + int ret; + + __asm__ __volatile__("push %%rbp;" + "push %%rcx;" + "push %%rdx;" + "push %%rsi;" + "push %%rdi;" + "push $0;" + "vmwrite %%rsp, %[host_rsp];" + "lea 1f(%%rip), %%rax;" + "vmwrite %%rax, %[host_rip];" + "vmlaunch;" + "incq (%%rsp);" + "1: pop %%rax;" + "pop %%rdi;" + "pop %%rsi;" + "pop %%rdx;" + "pop %%rcx;" + "pop %%rbp;" + : [ret]"=&a"(ret) + : [host_rsp]"r"((uint64_t)HOST_RSP), + [host_rip]"r"((uint64_t)HOST_RIP) + : "memory", "cc", "rbx", "r8", "r9", "r10", + "r11", "r12", "r13", "r14", "r15"); + return ret; +} + +/* + * No guest state (e.g. GPRs) is established by this vmresume. + */ +static inline int vmresume(void) +{ + int ret; + + __asm__ __volatile__("push %%rbp;" + "push %%rcx;" + "push %%rdx;" + "push %%rsi;" + "push %%rdi;" + "push $0;" + "vmwrite %%rsp, %[host_rsp];" + "lea 1f(%%rip), %%rax;" + "vmwrite %%rax, %[host_rip];" + "vmresume;" + "incq (%%rsp);" + "1: pop %%rax;" + "pop %%rdi;" + "pop %%rsi;" + "pop %%rdx;" + "pop %%rcx;" + "pop %%rbp;" + : [ret]"=&a"(ret) + : [host_rsp]"r"((uint64_t)HOST_RSP), + [host_rip]"r"((uint64_t)HOST_RIP) + : "memory", "cc", "rbx", "r8", "r9", "r10", + "r11", "r12", "r13", "r14", "r15"); + return ret; +} + +static inline int vmread(uint64_t encoding, uint64_t *value) +{ + uint64_t tmp; + uint8_t ret; + + __asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]" + : [value]"=rm"(tmp), [ret]"=rm"(ret) + : [encoding]"r"(encoding) + : "cc", "memory"); + + *value = tmp; + return ret; +} + +/* + * A wrapper around vmread that ignores errors and returns zero if the + * vmread instruction fails. + */ +static inline uint64_t vmreadz(uint64_t encoding) +{ + uint64_t value = 0; + vmread(encoding, &value); + return value; +} + +static inline int vmwrite(uint64_t encoding, uint64_t value) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]" + : [ret]"=rm"(ret) + : [value]"rm"(value), [encoding]"r"(encoding) + : "cc", "memory"); + + return ret; +} + +static inline uint32_t vmcs_revision(void) +{ + return rdmsr(MSR_IA32_VMX_BASIC); +} + +void prepare_for_vmx_operation(void); +void prepare_vmcs(void *guest_rip, void *guest_rsp); +struct kvm_vm *vm_create_default_vmx(uint32_t vcpuid, + vmx_guest_code_t guest_code); + +#endif /* !SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 7ca1bb40c498..37e2a787d2fc 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -50,8 +50,8 @@ int kvm_check_cap(long cap) int kvm_fd; kvm_fd = open(KVM_DEV_PATH, O_RDONLY); - TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", - KVM_DEV_PATH, kvm_fd, errno); + if (kvm_fd < 0) + exit(KSFT_SKIP); ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap); TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n" @@ -91,8 +91,8 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) vm->mode = mode; kvm_fd = open(KVM_DEV_PATH, perm); - TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", - KVM_DEV_PATH, kvm_fd, errno); + if (kvm_fd < 0) + exit(KSFT_SKIP); /* Create VM. */ vm->fd = ioctl(kvm_fd, KVM_CREATE_VM, NULL); @@ -378,7 +378,7 @@ int kvm_memcmp_hva_gva(void *hva, * complicated. This function uses a reasonable default length for * the array and performs the appropriate allocation. */ -struct kvm_cpuid2 *allocate_kvm_cpuid2(void) +static struct kvm_cpuid2 *allocate_kvm_cpuid2(void) { struct kvm_cpuid2 *cpuid; int nent = 100; @@ -402,26 +402,31 @@ struct kvm_cpuid2 *allocate_kvm_cpuid2(void) * Input Args: None * * Output Args: - * cpuid - The supported KVM CPUID * - * Return: void + * Return: The supported KVM CPUID * * Get the guest CPUID supported by KVM. */ -void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid) +struct kvm_cpuid2 *kvm_get_supported_cpuid(void) { + static struct kvm_cpuid2 *cpuid; int ret; int kvm_fd; + if (cpuid) + return cpuid; + + cpuid = allocate_kvm_cpuid2(); kvm_fd = open(KVM_DEV_PATH, O_RDONLY); - TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", - KVM_DEV_PATH, kvm_fd, errno); + if (kvm_fd < 0) + exit(KSFT_SKIP); ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n", ret, errno); close(kvm_fd); + return cpuid; } /* Locate a cpuid entry. @@ -435,12 +440,13 @@ void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid) * Return: A pointer to the cpuid entry. Never returns NULL. */ struct kvm_cpuid_entry2 * -find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function, - uint32_t index) +kvm_get_supported_cpuid_index(uint32_t function, uint32_t index) { + struct kvm_cpuid2 *cpuid; struct kvm_cpuid_entry2 *entry = NULL; int i; + cpuid = kvm_get_supported_cpuid(); for (i = 0; i < cpuid->nent; i++) { if (cpuid->entries[i].function == function && cpuid->entries[i].index == index) { @@ -669,8 +675,8 @@ static int vcpu_mmap_sz(void) int dev_fd, ret; dev_fd = open(KVM_DEV_PATH, O_RDONLY); - TEST_ASSERT(dev_fd >= 0, "%s open %s failed, rc: %i errno: %i", - __func__, KVM_DEV_PATH, dev_fd, errno); + if (dev_fd < 0) + exit(KSFT_SKIP); ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); TEST_ASSERT(ret >= sizeof(struct kvm_run), @@ -1435,7 +1441,7 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, sparsebit_idx_t pg; TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " - "not divisable by page size.\n" + "not divisible by page size.\n" " paddr_min: 0x%lx page_size: 0x%x", paddr_min, vm->page_size); diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c index 0c5cf3e0cb6f..b132bc95d183 100644 --- a/tools/testing/selftests/kvm/lib/sparsebit.c +++ b/tools/testing/selftests/kvm/lib/sparsebit.c @@ -121,7 +121,7 @@ * avoided by moving the setting of the nodes mask bits into * the previous nodes num_after setting. * - * + Node starting index is evenly divisable by the number of bits + * + Node starting index is evenly divisible by the number of bits * within a nodes mask member. * * + Nodes never represent a range of bits that wrap around the @@ -1741,7 +1741,7 @@ void sparsebit_validate_internal(struct sparsebit *s) /* Validate node index is divisible by the mask size */ if (nodep->idx % MASK_BITS) { - fprintf(stderr, "Node index not divisable by " + fprintf(stderr, "Node index not divisible by " "mask size,\n" " nodep: %p nodep->idx: 0x%lx " "MASK_BITS: %lu\n", diff --git a/tools/testing/selftests/kvm/lib/vmx.c b/tools/testing/selftests/kvm/lib/vmx.c new file mode 100644 index 000000000000..0231bc0aae7b --- /dev/null +++ b/tools/testing/selftests/kvm/lib/vmx.c @@ -0,0 +1,243 @@ +/* + * tools/testing/selftests/kvm/lib/x86.c + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include "test_util.h" +#include "kvm_util.h" +#include "x86.h" +#include "vmx.h" + +/* Create a default VM for VMX tests. + * + * Input Args: + * vcpuid - The id of the single VCPU to add to the VM. + * guest_code - The vCPU's entry point + * + * Output Args: None + * + * Return: + * Pointer to opaque structure that describes the created VM. + */ +struct kvm_vm * +vm_create_default_vmx(uint32_t vcpuid, vmx_guest_code_t guest_code) +{ + struct kvm_cpuid2 *cpuid; + struct kvm_vm *vm; + vm_vaddr_t vmxon_vaddr; + vm_paddr_t vmxon_paddr; + vm_vaddr_t vmcs_vaddr; + vm_paddr_t vmcs_paddr; + + vm = vm_create_default(vcpuid, (void *) guest_code); + + /* Enable nesting in CPUID */ + vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid()); + + /* Setup of a region of guest memory for the vmxon region. */ + vmxon_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0); + vmxon_paddr = addr_gva2gpa(vm, vmxon_vaddr); + + /* Setup of a region of guest memory for a vmcs. */ + vmcs_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0); + vmcs_paddr = addr_gva2gpa(vm, vmcs_vaddr); + + vcpu_args_set(vm, vcpuid, 4, vmxon_vaddr, vmxon_paddr, vmcs_vaddr, + vmcs_paddr); + + return vm; +} + +void prepare_for_vmx_operation(void) +{ + uint64_t feature_control; + uint64_t required; + unsigned long cr0; + unsigned long cr4; + + /* + * Ensure bits in CR0 and CR4 are valid in VMX operation: + * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx. + * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx. + */ + __asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory"); + cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1); + cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0); + __asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory"); + + __asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory"); + cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1); + cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0); + /* Enable VMX operation */ + cr4 |= X86_CR4_VMXE; + __asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory"); + + /* + * Configure IA32_FEATURE_CONTROL MSR to allow VMXON: + * Bit 0: Lock bit. If clear, VMXON causes a #GP. + * Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON + * outside of SMX causes a #GP. + */ + required = FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + required |= FEATURE_CONTROL_LOCKED; + feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); + if ((feature_control & required) != required) + wrmsr(MSR_IA32_FEATURE_CONTROL, feature_control | required); +} + +/* + * Initialize the control fields to the most basic settings possible. + */ +static inline void init_vmcs_control_fields(void) +{ + vmwrite(VIRTUAL_PROCESSOR_ID, 0); + vmwrite(POSTED_INTR_NV, 0); + + vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PINBASED_CTLS)); + vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PROCBASED_CTLS)); + vmwrite(EXCEPTION_BITMAP, 0); + vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0); + vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */ + vmwrite(CR3_TARGET_COUNT, 0); + vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) | + VM_EXIT_HOST_ADDR_SPACE_SIZE); /* 64-bit host */ + vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); + vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0); + vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) | + VM_ENTRY_IA32E_MODE); /* 64-bit guest */ + vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0); + vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0); + vmwrite(TPR_THRESHOLD, 0); + vmwrite(SECONDARY_VM_EXEC_CONTROL, 0); + + vmwrite(CR0_GUEST_HOST_MASK, 0); + vmwrite(CR4_GUEST_HOST_MASK, 0); + vmwrite(CR0_READ_SHADOW, get_cr0()); + vmwrite(CR4_READ_SHADOW, get_cr4()); +} + +/* + * Initialize the host state fields based on the current host state, with + * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch + * or vmresume. + */ +static inline void init_vmcs_host_state(void) +{ + uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS); + + vmwrite(HOST_ES_SELECTOR, get_es()); + vmwrite(HOST_CS_SELECTOR, get_cs()); + vmwrite(HOST_SS_SELECTOR, get_ss()); + vmwrite(HOST_DS_SELECTOR, get_ds()); + vmwrite(HOST_FS_SELECTOR, get_fs()); + vmwrite(HOST_GS_SELECTOR, get_gs()); + vmwrite(HOST_TR_SELECTOR, get_tr()); + + if (exit_controls & VM_EXIT_LOAD_IA32_PAT) + vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT)); + if (exit_controls & VM_EXIT_LOAD_IA32_EFER) + vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER)); + if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) + vmwrite(HOST_IA32_PERF_GLOBAL_CTRL, + rdmsr(MSR_CORE_PERF_GLOBAL_CTRL)); + + vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS)); + + vmwrite(HOST_CR0, get_cr0()); + vmwrite(HOST_CR3, get_cr3()); + vmwrite(HOST_CR4, get_cr4()); + vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE)); + vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE)); + vmwrite(HOST_TR_BASE, + get_desc64_base((struct desc64 *)(get_gdt_base() + get_tr()))); + vmwrite(HOST_GDTR_BASE, get_gdt_base()); + vmwrite(HOST_IDTR_BASE, get_idt_base()); + vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP)); + vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP)); +} + +/* + * Initialize the guest state fields essentially as a clone of + * the host state fields. Some host state fields have fixed + * values, and we set the corresponding guest state fields accordingly. + */ +static inline void init_vmcs_guest_state(void *rip, void *rsp) +{ + vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR)); + vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR)); + vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR)); + vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR)); + vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR)); + vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR)); + vmwrite(GUEST_LDTR_SELECTOR, 0); + vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR)); + vmwrite(GUEST_INTR_STATUS, 0); + vmwrite(GUEST_PML_INDEX, 0); + + vmwrite(VMCS_LINK_POINTER, -1ll); + vmwrite(GUEST_IA32_DEBUGCTL, 0); + vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT)); + vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER)); + vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL, + vmreadz(HOST_IA32_PERF_GLOBAL_CTRL)); + + vmwrite(GUEST_ES_LIMIT, -1); + vmwrite(GUEST_CS_LIMIT, -1); + vmwrite(GUEST_SS_LIMIT, -1); + vmwrite(GUEST_DS_LIMIT, -1); + vmwrite(GUEST_FS_LIMIT, -1); + vmwrite(GUEST_GS_LIMIT, -1); + vmwrite(GUEST_LDTR_LIMIT, -1); + vmwrite(GUEST_TR_LIMIT, 0x67); + vmwrite(GUEST_GDTR_LIMIT, 0xffff); + vmwrite(GUEST_IDTR_LIMIT, 0xffff); + vmwrite(GUEST_ES_AR_BYTES, + vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_CS_AR_BYTES, 0xa09b); + vmwrite(GUEST_SS_AR_BYTES, 0xc093); + vmwrite(GUEST_DS_AR_BYTES, + vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_FS_AR_BYTES, + vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_GS_AR_BYTES, + vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_LDTR_AR_BYTES, 0x10000); + vmwrite(GUEST_TR_AR_BYTES, 0x8b); + vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0); + vmwrite(GUEST_ACTIVITY_STATE, 0); + vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS)); + vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0); + + vmwrite(GUEST_CR0, vmreadz(HOST_CR0)); + vmwrite(GUEST_CR3, vmreadz(HOST_CR3)); + vmwrite(GUEST_CR4, vmreadz(HOST_CR4)); + vmwrite(GUEST_ES_BASE, 0); + vmwrite(GUEST_CS_BASE, 0); + vmwrite(GUEST_SS_BASE, 0); + vmwrite(GUEST_DS_BASE, 0); + vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE)); + vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE)); + vmwrite(GUEST_LDTR_BASE, 0); + vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE)); + vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE)); + vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE)); + vmwrite(GUEST_DR7, 0x400); + vmwrite(GUEST_RSP, (uint64_t)rsp); + vmwrite(GUEST_RIP, (uint64_t)rip); + vmwrite(GUEST_RFLAGS, 2); + vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0); + vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP)); + vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP)); +} + +void prepare_vmcs(void *guest_rip, void *guest_rsp) +{ + init_vmcs_control_fields(); + init_vmcs_host_state(); + init_vmcs_guest_state(guest_rip, guest_rsp); +} diff --git a/tools/testing/selftests/kvm/sync_regs_test.c b/tools/testing/selftests/kvm/sync_regs_test.c index 428e9473f5e2..eae1ece3c31b 100644 --- a/tools/testing/selftests/kvm/sync_regs_test.c +++ b/tools/testing/selftests/kvm/sync_regs_test.c @@ -85,6 +85,9 @@ static void compare_vcpu_events(struct kvm_vcpu_events *left, { } +#define TEST_SYNC_FIELDS (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS) +#define INVALID_SYNC_FIELD 0x80000000 + int main(int argc, char *argv[]) { struct kvm_vm *vm; @@ -98,9 +101,14 @@ int main(int argc, char *argv[]) setbuf(stdout, NULL); cap = kvm_check_cap(KVM_CAP_SYNC_REGS); - TEST_ASSERT((unsigned long)cap == KVM_SYNC_X86_VALID_FIELDS, - "KVM_CAP_SYNC_REGS (0x%x) != KVM_SYNC_X86_VALID_FIELDS (0x%lx)\n", - cap, KVM_SYNC_X86_VALID_FIELDS); + if ((cap & TEST_SYNC_FIELDS) != TEST_SYNC_FIELDS) { + fprintf(stderr, "KVM_CAP_SYNC_REGS not supported, skipping test\n"); + exit(KSFT_SKIP); + } + if ((cap & INVALID_SYNC_FIELD) != 0) { + fprintf(stderr, "The \"invalid\" field is not invalid, skipping test\n"); + exit(KSFT_SKIP); + } /* Create VM */ vm = vm_create_default(VCPU_ID, guest_code); @@ -108,7 +116,14 @@ int main(int argc, char *argv[]) run = vcpu_state(vm, VCPU_ID); /* Request reading invalid register set from VCPU. */ - run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS << 1; + run->kvm_valid_regs = INVALID_SYNC_FIELD; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + rv); + vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0; + + run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(rv < 0 && errno == EINVAL, "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", @@ -116,7 +131,14 @@ int main(int argc, char *argv[]) vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0; /* Request setting invalid register set into VCPU. */ - run->kvm_dirty_regs = KVM_SYNC_X86_VALID_FIELDS << 1; + run->kvm_dirty_regs = INVALID_SYNC_FIELD; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + rv); + vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0; + + run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(rv < 0 && errno == EINVAL, "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", @@ -125,7 +147,7 @@ int main(int argc, char *argv[]) /* Request and verify all valid register sets. */ /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */ - run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS; + run->kvm_valid_regs = TEST_SYNC_FIELDS; rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Unexpected exit reason: %u (%s),\n", @@ -146,7 +168,7 @@ int main(int argc, char *argv[]) run->s.regs.sregs.apic_base = 1 << 11; /* TODO run->s.regs.events.XYZ = ABC; */ - run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS; + run->kvm_valid_regs = TEST_SYNC_FIELDS; run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS; rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, @@ -172,7 +194,7 @@ int main(int argc, char *argv[]) /* Clear kvm_dirty_regs bits, verify new s.regs values are * overwritten with existing guest values. */ - run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS; + run->kvm_valid_regs = TEST_SYNC_FIELDS; run->kvm_dirty_regs = 0; run->s.regs.regs.r11 = 0xDEADBEEF; rv = _vcpu_run(vm, VCPU_ID); @@ -211,7 +233,7 @@ int main(int argc, char *argv[]) * with kvm_sync_regs values. */ run->kvm_valid_regs = 0; - run->kvm_dirty_regs = KVM_SYNC_X86_VALID_FIELDS; + run->kvm_dirty_regs = TEST_SYNC_FIELDS; run->s.regs.regs.r11 = 0xBBBB; rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c new file mode 100644 index 000000000000..aaa633263b2c --- /dev/null +++ b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c @@ -0,0 +1,231 @@ +/* + * gtests/tests/vmx_tsc_adjust_test.c + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * + * IA32_TSC_ADJUST test + * + * According to the SDM, "if an execution of WRMSR to the + * IA32_TIME_STAMP_COUNTER MSR adds (or subtracts) value X from the TSC, + * the logical processor also adds (or subtracts) value X from the + * IA32_TSC_ADJUST MSR. + * + * Note that when L1 doesn't intercept writes to IA32_TSC, a + * WRMSR(IA32_TSC) from L2 sets L1's TSC value, not L2's perceived TSC + * value. + * + * This test verifies that this unusual case is handled correctly. + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "x86.h" +#include "vmx.h" + +#include <string.h> +#include <sys/ioctl.h> + +#ifndef MSR_IA32_TSC_ADJUST +#define MSR_IA32_TSC_ADJUST 0x3b +#endif + +#define PAGE_SIZE 4096 +#define VCPU_ID 5 + +#define TSC_ADJUST_VALUE (1ll << 32) +#define TSC_OFFSET_VALUE -(1ll << 48) + +enum { + PORT_ABORT = 0x1000, + PORT_REPORT, + PORT_DONE, +}; + +struct vmx_page { + vm_vaddr_t virt; + vm_paddr_t phys; +}; + +enum { + VMXON_PAGE = 0, + VMCS_PAGE, + MSR_BITMAP_PAGE, + + NUM_VMX_PAGES, +}; + +struct kvm_single_msr { + struct kvm_msrs header; + struct kvm_msr_entry entry; +} __attribute__((packed)); + +/* The virtual machine object. */ +static struct kvm_vm *vm; + +/* Array of vmx_page descriptors that is shared with the guest. */ +struct vmx_page *vmx_pages; + +#define exit_to_l0(_port, _arg) do_exit_to_l0(_port, (unsigned long) (_arg)) +static void do_exit_to_l0(uint16_t port, unsigned long arg) +{ + __asm__ __volatile__("in %[port], %%al" + : + : [port]"d"(port), "D"(arg) + : "rax"); +} + + +#define GUEST_ASSERT(_condition) do { \ + if (!(_condition)) \ + exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition); \ +} while (0) + +static void check_ia32_tsc_adjust(int64_t max) +{ + int64_t adjust; + + adjust = rdmsr(MSR_IA32_TSC_ADJUST); + exit_to_l0(PORT_REPORT, adjust); + GUEST_ASSERT(adjust <= max); +} + +static void l2_guest_code(void) +{ + uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE; + + wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE); + check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); + + /* Exit to L1 */ + __asm__ __volatile__("vmcall"); +} + +static void l1_guest_code(struct vmx_page *vmx_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uint32_t control; + uintptr_t save_cr3; + + GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE); + wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE); + check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); + + prepare_for_vmx_operation(); + + /* Enter VMX root operation. */ + *(uint32_t *)vmx_pages[VMXON_PAGE].virt = vmcs_revision(); + GUEST_ASSERT(!vmxon(vmx_pages[VMXON_PAGE].phys)); + + /* Load a VMCS. */ + *(uint32_t *)vmx_pages[VMCS_PAGE].virt = vmcs_revision(); + GUEST_ASSERT(!vmclear(vmx_pages[VMCS_PAGE].phys)); + GUEST_ASSERT(!vmptrld(vmx_pages[VMCS_PAGE].phys)); + + /* Prepare the VMCS for L2 execution. */ + prepare_vmcs(l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); + control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETING; + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); + vmwrite(MSR_BITMAP, vmx_pages[MSR_BITMAP_PAGE].phys); + vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE); + + /* Jump into L2. First, test failure to load guest CR3. */ + save_cr3 = vmreadz(GUEST_CR3); + vmwrite(GUEST_CR3, -1ull); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == + (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE)); + check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); + vmwrite(GUEST_CR3, save_cr3); + + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); + + exit_to_l0(PORT_DONE, 0); +} + +static void allocate_vmx_page(struct vmx_page *page) +{ + vm_vaddr_t virt; + + virt = vm_vaddr_alloc(vm, PAGE_SIZE, 0, 0, 0); + memset(addr_gva2hva(vm, virt), 0, PAGE_SIZE); + + page->virt = virt; + page->phys = addr_gva2gpa(vm, virt); +} + +static vm_vaddr_t allocate_vmx_pages(void) +{ + vm_vaddr_t vmx_pages_vaddr; + int i; + + vmx_pages_vaddr = vm_vaddr_alloc( + vm, sizeof(struct vmx_page) * NUM_VMX_PAGES, 0, 0, 0); + + vmx_pages = (void *) addr_gva2hva(vm, vmx_pages_vaddr); + + for (i = 0; i < NUM_VMX_PAGES; i++) + allocate_vmx_page(&vmx_pages[i]); + + return vmx_pages_vaddr; +} + +void report(int64_t val) +{ + printf("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n", + val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t vmx_pages_vaddr; + struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); + + if (!(entry->ecx & CPUID_VMX)) { + fprintf(stderr, "nested VMX not enabled, skipping test\n"); + exit(KSFT_SKIP); + } + + vm = vm_create_default_vmx(VCPU_ID, (void *) l1_guest_code); + + /* Allocate VMX pages and shared descriptors (vmx_pages). */ + vmx_pages_vaddr = allocate_vmx_pages(); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_vaddr); + + for (;;) { + volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct kvm_regs regs; + + vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Got exit_reason other than KVM_EXIT_IO: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + vcpu_regs_get(vm, VCPU_ID, ®s); + + switch (run->io.port) { + case PORT_ABORT: + TEST_ASSERT(false, "%s", (const char *) regs.rdi); + /* NOT REACHED */ + case PORT_REPORT: + report(regs.rdi); + break; + case PORT_DONE: + goto done; + default: + TEST_ASSERT(false, "Unknown port 0x%x.", run->io.port); + } + } + + kvm_vm_free(vm); +done: + return 0; +} diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 195e9d4739a9..c1b1a4dc6a96 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -20,10 +20,10 @@ all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) .ONESHELL: define RUN_TESTS - @export KSFT_TAP_LEVEL=`echo 1`; - @test_num=`echo 0`; - @echo "TAP version 13"; - @for TEST in $(1); do \ + @export KSFT_TAP_LEVEL=`echo 1`; \ + test_num=`echo 0`; \ + echo "TAP version 13"; \ + for TEST in $(1); do \ BASENAME_TEST=`basename $$TEST`; \ test_num=`echo $$test_num+1 | bc`; \ echo "selftests: $$BASENAME_TEST"; \ diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 785fc18a16b4..3ff81a478dbe 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -6,6 +6,7 @@ CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh +TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 6a75a3ea44ad..7ba089b33e8b 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -7,3 +7,8 @@ CONFIG_NET_L3_MASTER_DEV=y CONFIG_IPV6=y CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_VETH=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_NET_IPVTI=y +CONFIG_INET6_XFRM_MODE_TUNNEL=y +CONFIG_IPV6_VTI=y +CONFIG_DUMMY=y diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c index 365c32e84189..c9f478b40996 100644 --- a/tools/testing/selftests/net/reuseport_bpf_numa.c +++ b/tools/testing/selftests/net/reuseport_bpf_numa.c @@ -23,6 +23,8 @@ #include <unistd.h> #include <numa.h> +#include "../kselftest.h" + static const int PORT = 8888; static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto) @@ -229,7 +231,7 @@ int main(void) int *rcv_fd, nodes; if (numa_available() < 0) - error(1, errno, "no numa api support"); + ksft_exit_skip("no numa api support\n"); nodes = numa_max_node() + 1; diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c index e38ad6d94d4b..fcff7047000d 100644 --- a/tools/testing/selftests/proc/proc-loadavg-001.c +++ b/tools/testing/selftests/proc/proc-loadavg-001.c @@ -1,5 +1,5 @@ /* - * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com> + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above diff --git a/tools/testing/selftests/proc/proc-self-map-files-001.c b/tools/testing/selftests/proc/proc-self-map-files-001.c index af1d0a6af810..4209c64283d6 100644 --- a/tools/testing/selftests/proc/proc-self-map-files-001.c +++ b/tools/testing/selftests/proc/proc-self-map-files-001.c @@ -1,5 +1,5 @@ /* - * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com> + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above diff --git a/tools/testing/selftests/proc/proc-self-map-files-002.c b/tools/testing/selftests/proc/proc-self-map-files-002.c index aebf4be56111..6f1f4a6e1ecb 100644 --- a/tools/testing/selftests/proc/proc-self-map-files-002.c +++ b/tools/testing/selftests/proc/proc-self-map-files-002.c @@ -1,5 +1,5 @@ /* - * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com> + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c index 05eb6f91f1e9..5ab5f4810e43 100644 --- a/tools/testing/selftests/proc/proc-self-syscall.c +++ b/tools/testing/selftests/proc/proc-self-syscall.c @@ -1,3 +1,18 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ #define _GNU_SOURCE #include <unistd.h> #include <sys/syscall.h> diff --git a/tools/testing/selftests/proc/proc-self-wchan.c b/tools/testing/selftests/proc/proc-self-wchan.c index b8d8728a6869..a38b2fbaa7ad 100644 --- a/tools/testing/selftests/proc/proc-self-wchan.c +++ b/tools/testing/selftests/proc/proc-self-wchan.c @@ -1,3 +1,18 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> diff --git a/tools/testing/selftests/proc/proc-uptime-001.c b/tools/testing/selftests/proc/proc-uptime-001.c index 303f26092306..781f7a50fc3f 100644 --- a/tools/testing/selftests/proc/proc-uptime-001.c +++ b/tools/testing/selftests/proc/proc-uptime-001.c @@ -1,5 +1,5 @@ /* - * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com> + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c index 0cb79e1f1674..30e2b7849089 100644 --- a/tools/testing/selftests/proc/proc-uptime-002.c +++ b/tools/testing/selftests/proc/proc-uptime-002.c @@ -1,5 +1,5 @@ /* - * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com> + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above diff --git a/tools/testing/selftests/proc/proc-uptime.h b/tools/testing/selftests/proc/proc-uptime.h index d584419f50a7..0e464b50e9d9 100644 --- a/tools/testing/selftests/proc/proc-uptime.h +++ b/tools/testing/selftests/proc/proc-uptime.h @@ -1,5 +1,5 @@ /* - * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com> + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above diff --git a/tools/testing/selftests/proc/read.c b/tools/testing/selftests/proc/read.c index 12e397f78592..1e73c2232097 100644 --- a/tools/testing/selftests/proc/read.c +++ b/tools/testing/selftests/proc/read.c @@ -1,5 +1,5 @@ /* - * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com> + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 168c66d74fc5..e1473234968d 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -134,11 +134,15 @@ struct seccomp_data { #endif #ifndef SECCOMP_FILTER_FLAG_TSYNC -#define SECCOMP_FILTER_FLAG_TSYNC 1 +#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) #endif #ifndef SECCOMP_FILTER_FLAG_LOG -#define SECCOMP_FILTER_FLAG_LOG 2 +#define SECCOMP_FILTER_FLAG_LOG (1UL << 1) +#endif + +#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW +#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) #endif #ifndef PTRACE_SECCOMP_GET_METADATA @@ -2072,14 +2076,26 @@ TEST(seccomp_syscall_mode_lock) TEST(detect_seccomp_filter_flags) { unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, - SECCOMP_FILTER_FLAG_LOG }; + SECCOMP_FILTER_FLAG_LOG, + SECCOMP_FILTER_FLAG_SPEC_ALLOW }; unsigned int flag, all_flags; int i; long ret; /* Test detection of known-good filter flags */ for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { + int bits = 0; + flag = flags[i]; + /* Make sure the flag is a single bit! */ + while (flag) { + if (flag & 0x1) + bits ++; + flag >>= 1; + } + ASSERT_EQ(1, bits); + flag = flags[i]; + ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); ASSERT_NE(ENOSYS, errno) { TH_LOG("Kernel does not support seccomp syscall!"); diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json index 5b012f4981d4..6f289a49e5ec 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json @@ -66,7 +66,7 @@ "cmdUnderTest": "$TC action add action bpf object-file _b.o index 667", "expExitCode": "0", "verifyCmd": "$TC action get action bpf index 667", - "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c default-action pipe.*index 667 ref", + "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c( jited)? default-action pipe.*index 667 ref", "matchCount": "1", "teardown": [ "$TC action flush action bpf", @@ -92,10 +92,15 @@ "cmdUnderTest": "$TC action add action bpf object-file _c.o index 667", "expExitCode": "255", "verifyCmd": "$TC action get action bpf index 667", - "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9].*index 667 ref", + "matchPattern": "action order [0-9]*: bpf _c.o:\\[action\\] id [0-9].*index 667 ref", "matchCount": "0", "teardown": [ - "$TC action flush action bpf", + [ + "$TC action flush action bpf", + 0, + 1, + 255 + ], "rm -f _c.o" ] }, diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index d744991c0f4f..39f66bc29b82 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -11,7 +11,7 @@ CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \ - protection_keys test_vdso test_vsyscall + protection_keys test_vdso test_vsyscall mov_ss_trap TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c new file mode 100644 index 000000000000..3c3a022654f3 --- /dev/null +++ b/tools/testing/selftests/x86/mov_ss_trap.c @@ -0,0 +1,285 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * mov_ss_trap.c: Exercise the bizarre side effects of a watchpoint on MOV SS + * + * This does MOV SS from a watchpointed address followed by various + * types of kernel entries. A MOV SS that hits a watchpoint will queue + * up a #DB trap but will not actually deliver that trap. The trap + * will be delivered after the next instruction instead. The CPU's logic + * seems to be: + * + * - Any fault: drop the pending #DB trap. + * - INT $N, INT3, INTO, SYSCALL, SYSENTER: enter the kernel and then + * deliver #DB. + * - ICEBP: enter the kernel but do not deliver the watchpoint trap + * - breakpoint: only one #DB is delivered (phew!) + * + * There are plenty of ways for a kernel to handle this incorrectly. This + * test tries to exercise all the cases. + * + * This should mostly cover CVE-2018-1087 and CVE-2018-8897. + */ +#define _GNU_SOURCE + +#include <stdlib.h> +#include <sys/ptrace.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/user.h> +#include <sys/syscall.h> +#include <unistd.h> +#include <errno.h> +#include <stddef.h> +#include <stdio.h> +#include <err.h> +#include <string.h> +#include <setjmp.h> +#include <sys/prctl.h> + +#define X86_EFLAGS_RF (1UL << 16) + +#if __x86_64__ +# define REG_IP REG_RIP +#else +# define REG_IP REG_EIP +#endif + +unsigned short ss; +extern unsigned char breakpoint_insn[]; +sigjmp_buf jmpbuf; +static unsigned char altstack_data[SIGSTKSZ]; + +static void enable_watchpoint(void) +{ + pid_t parent = getpid(); + int status; + + pid_t child = fork(); + if (child < 0) + err(1, "fork"); + + if (child) { + if (waitpid(child, &status, 0) != child) + err(1, "waitpid for child"); + } else { + unsigned long dr0, dr1, dr7; + + dr0 = (unsigned long)&ss; + dr1 = (unsigned long)breakpoint_insn; + dr7 = ((1UL << 1) | /* G0 */ + (3UL << 16) | /* RW0 = read or write */ + (1UL << 18) | /* LEN0 = 2 bytes */ + (1UL << 3)); /* G1, RW1 = insn */ + + if (ptrace(PTRACE_ATTACH, parent, NULL, NULL) != 0) + err(1, "PTRACE_ATTACH"); + + if (waitpid(parent, &status, 0) != parent) + err(1, "waitpid for child"); + + if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[0]), dr0) != 0) + err(1, "PTRACE_POKEUSER DR0"); + + if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[1]), dr1) != 0) + err(1, "PTRACE_POKEUSER DR1"); + + if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[7]), dr7) != 0) + err(1, "PTRACE_POKEUSER DR7"); + + printf("\tDR0 = %lx, DR1 = %lx, DR7 = %lx\n", dr0, dr1, dr7); + + if (ptrace(PTRACE_DETACH, parent, NULL, NULL) != 0) + err(1, "PTRACE_DETACH"); + + exit(0); + } +} + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static char const * const signames[] = { + [SIGSEGV] = "SIGSEGV", + [SIGBUS] = "SIBGUS", + [SIGTRAP] = "SIGTRAP", + [SIGILL] = "SIGILL", +}; + +static void sigtrap(int sig, siginfo_t *si, void *ctx_void) +{ + ucontext_t *ctx = ctx_void; + + printf("\tGot SIGTRAP with RIP=%lx, EFLAGS.RF=%d\n", + (unsigned long)ctx->uc_mcontext.gregs[REG_IP], + !!(ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_RF)); +} + +static void handle_and_return(int sig, siginfo_t *si, void *ctx_void) +{ + ucontext_t *ctx = ctx_void; + + printf("\tGot %s with RIP=%lx\n", signames[sig], + (unsigned long)ctx->uc_mcontext.gregs[REG_IP]); +} + +static void handle_and_longjmp(int sig, siginfo_t *si, void *ctx_void) +{ + ucontext_t *ctx = ctx_void; + + printf("\tGot %s with RIP=%lx\n", signames[sig], + (unsigned long)ctx->uc_mcontext.gregs[REG_IP]); + + siglongjmp(jmpbuf, 1); +} + +int main() +{ + unsigned long nr; + + asm volatile ("mov %%ss, %[ss]" : [ss] "=m" (ss)); + printf("\tSS = 0x%hx, &SS = 0x%p\n", ss, &ss); + + if (prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0) == 0) + printf("\tPR_SET_PTRACER_ANY succeeded\n"); + + printf("\tSet up a watchpoint\n"); + sethandler(SIGTRAP, sigtrap, 0); + enable_watchpoint(); + + printf("[RUN]\tRead from watched memory (should get SIGTRAP)\n"); + asm volatile ("mov %[ss], %[tmp]" : [tmp] "=r" (nr) : [ss] "m" (ss)); + + printf("[RUN]\tMOV SS; INT3\n"); + asm volatile ("mov %[ss], %%ss; int3" :: [ss] "m" (ss)); + + printf("[RUN]\tMOV SS; INT 3\n"); + asm volatile ("mov %[ss], %%ss; .byte 0xcd, 0x3" :: [ss] "m" (ss)); + + printf("[RUN]\tMOV SS; CS CS INT3\n"); + asm volatile ("mov %[ss], %%ss; .byte 0x2e, 0x2e; int3" :: [ss] "m" (ss)); + + printf("[RUN]\tMOV SS; CSx14 INT3\n"); + asm volatile ("mov %[ss], %%ss; .fill 14,1,0x2e; int3" :: [ss] "m" (ss)); + + printf("[RUN]\tMOV SS; INT 4\n"); + sethandler(SIGSEGV, handle_and_return, SA_RESETHAND); + asm volatile ("mov %[ss], %%ss; int $4" :: [ss] "m" (ss)); + +#ifdef __i386__ + printf("[RUN]\tMOV SS; INTO\n"); + sethandler(SIGSEGV, handle_and_return, SA_RESETHAND); + nr = -1; + asm volatile ("add $1, %[tmp]; mov %[ss], %%ss; into" + : [tmp] "+r" (nr) : [ss] "m" (ss)); +#endif + + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; ICEBP\n"); + + /* Some emulators (e.g. QEMU TCG) don't emulate ICEBP. */ + sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND); + + asm volatile ("mov %[ss], %%ss; .byte 0xf1" :: [ss] "m" (ss)); + } + + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; CLI\n"); + sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); + asm volatile ("mov %[ss], %%ss; cli" :: [ss] "m" (ss)); + } + + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; #PF\n"); + sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); + asm volatile ("mov %[ss], %%ss; mov (-1), %[tmp]" + : [tmp] "=r" (nr) : [ss] "m" (ss)); + } + + /* + * INT $1: if #DB has DPL=3 and there isn't special handling, + * then the kernel will die. + */ + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; INT 1\n"); + sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); + asm volatile ("mov %[ss], %%ss; int $1" :: [ss] "m" (ss)); + } + +#ifdef __x86_64__ + /* + * In principle, we should test 32-bit SYSCALL as well, but + * the calling convention is so unpredictable that it's + * not obviously worth the effort. + */ + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; SYSCALL\n"); + sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND); + nr = SYS_getpid; + /* + * Toggle the high bit of RSP to make it noncanonical to + * strengthen this test on non-SMAP systems. + */ + asm volatile ("btc $63, %%rsp\n\t" + "mov %[ss], %%ss; syscall\n\t" + "btc $63, %%rsp" + : "+a" (nr) : [ss] "m" (ss) + : "rcx" +#ifdef __x86_64__ + , "r11" +#endif + ); + } +#endif + + printf("[RUN]\tMOV SS; breakpointed NOP\n"); + asm volatile ("mov %[ss], %%ss; breakpoint_insn: nop" :: [ss] "m" (ss)); + + /* + * Invoking SYSENTER directly breaks all the rules. Just handle + * the SIGSEGV. + */ + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; SYSENTER\n"); + stack_t stack = { + .ss_sp = altstack_data, + .ss_size = SIGSTKSZ, + }; + if (sigaltstack(&stack, NULL) != 0) + err(1, "sigaltstack"); + sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK); + nr = SYS_getpid; + asm volatile ("mov %[ss], %%ss; SYSENTER" : "+a" (nr) + : [ss] "m" (ss) : "flags", "rcx" +#ifdef __x86_64__ + , "r11" +#endif + ); + + /* We're unreachable here. SYSENTER forgets RIP. */ + } + + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; INT $0x80\n"); + sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); + nr = 20; /* compat getpid */ + asm volatile ("mov %[ss], %%ss; int $0x80" + : "+a" (nr) : [ss] "m" (ss) + : "flags" +#ifdef __x86_64__ + , "r8", "r9", "r10", "r11" +#endif + ); + } + + printf("[OK]\tI aten't dead\n"); + return 0; +} diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c index 9c0325e1ea68..50f7e9272481 100644 --- a/tools/testing/selftests/x86/mpx-mini-test.c +++ b/tools/testing/selftests/x86/mpx-mini-test.c @@ -368,6 +368,11 @@ static int expected_bnd_index = -1; uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */ unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS]; +/* Failed address bound checks: */ +#ifndef SEGV_BNDERR +# define SEGV_BNDERR 3 +#endif + /* * The kernel is supposed to provide some information about the bounds * exception in the siginfo. It should match what we have in the bounds @@ -419,8 +424,6 @@ void handler(int signum, siginfo_t *si, void *vucontext) br_count++; dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count); -#define SEGV_BNDERR 3 /* failed address bound checks */ - dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n", status, ip, br_reason); dprintf2("si_signo: %d\n", si->si_signo); diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h index b3cb7670e026..254e5436bdd9 100644 --- a/tools/testing/selftests/x86/pkey-helpers.h +++ b/tools/testing/selftests/x86/pkey-helpers.h @@ -26,30 +26,26 @@ static inline void sigsafe_printf(const char *format, ...) { va_list ap; - va_start(ap, format); if (!dprint_in_signal) { + va_start(ap, format); vprintf(format, ap); + va_end(ap); } else { int ret; - int len = vsnprintf(dprint_in_signal_buffer, - DPRINT_IN_SIGNAL_BUF_SIZE, - format, ap); /* - * len is amount that would have been printed, - * but actual write is truncated at BUF_SIZE. + * No printf() functions are signal-safe. + * They deadlock easily. Write the format + * string to get some output, even if + * incomplete. */ - if (len > DPRINT_IN_SIGNAL_BUF_SIZE) - len = DPRINT_IN_SIGNAL_BUF_SIZE; - ret = write(1, dprint_in_signal_buffer, len); + ret = write(1, format, strlen(format)); if (ret < 0) - abort(); + exit(1); } - va_end(ap); } #define dprintf_level(level, args...) do { \ if (level <= DEBUG_LEVEL) \ sigsafe_printf(args); \ - fflush(NULL); \ } while (0) #define dprintf0(args...) dprintf_level(0, args) #define dprintf1(args...) dprintf_level(1, args) diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c index f15aa5a76fe3..460b4bdf4c1e 100644 --- a/tools/testing/selftests/x86/protection_keys.c +++ b/tools/testing/selftests/x86/protection_keys.c @@ -72,10 +72,9 @@ extern void abort_hooks(void); test_nr, iteration_nr); \ dprintf0("errno at assert: %d", errno); \ abort_hooks(); \ - assert(condition); \ + exit(__LINE__); \ } \ } while (0) -#define raw_assert(cond) assert(cond) void cat_into_file(char *str, char *file) { @@ -87,12 +86,17 @@ void cat_into_file(char *str, char *file) * these need to be raw because they are called under * pkey_assert() */ - raw_assert(fd >= 0); + if (fd < 0) { + fprintf(stderr, "error opening '%s'\n", str); + perror("error: "); + exit(__LINE__); + } + ret = write(fd, str, strlen(str)); if (ret != strlen(str)) { perror("write to file failed"); fprintf(stderr, "filename: '%s' str: '%s'\n", file, str); - raw_assert(0); + exit(__LINE__); } close(fd); } @@ -191,26 +195,30 @@ void lots_o_noops_around_write(int *write_to_me) #ifdef __i386__ #ifndef SYS_mprotect_key -# define SYS_mprotect_key 380 +# define SYS_mprotect_key 380 #endif + #ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 381 -# define SYS_pkey_free 382 +# define SYS_pkey_alloc 381 +# define SYS_pkey_free 382 #endif -#define REG_IP_IDX REG_EIP -#define si_pkey_offset 0x14 + +#define REG_IP_IDX REG_EIP +#define si_pkey_offset 0x14 #else #ifndef SYS_mprotect_key -# define SYS_mprotect_key 329 +# define SYS_mprotect_key 329 #endif + #ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 330 -# define SYS_pkey_free 331 +# define SYS_pkey_alloc 330 +# define SYS_pkey_free 331 #endif -#define REG_IP_IDX REG_RIP -#define si_pkey_offset 0x20 + +#define REG_IP_IDX REG_RIP +#define si_pkey_offset 0x20 #endif @@ -225,8 +233,14 @@ void dump_mem(void *dumpme, int len_bytes) } } -#define SEGV_BNDERR 3 /* failed address bound checks */ -#define SEGV_PKUERR 4 +/* Failed address bound checks: */ +#ifndef SEGV_BNDERR +# define SEGV_BNDERR 3 +#endif + +#ifndef SEGV_PKUERR +# define SEGV_PKUERR 4 +#endif static char *si_code_str(int si_code) { @@ -289,13 +303,6 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) dump_mem(pkru_ptr - 128, 256); pkey_assert(*pkru_ptr); - si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); - dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); - dump_mem(si_pkey_ptr - 8, 24); - siginfo_pkey = *si_pkey_ptr; - pkey_assert(siginfo_pkey < NR_PKEYS); - last_si_pkey = siginfo_pkey; - if ((si->si_code == SEGV_MAPERR) || (si->si_code == SEGV_ACCERR) || (si->si_code == SEGV_BNDERR)) { @@ -303,6 +310,13 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) exit(4); } + si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); + dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); + dump_mem((u8 *)si_pkey_ptr - 8, 24); + siginfo_pkey = *si_pkey_ptr; + pkey_assert(siginfo_pkey < NR_PKEYS); + last_si_pkey = siginfo_pkey; + dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); /* need __rdpkru() version so we do not do shadow_pkru checking */ dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); @@ -311,22 +325,6 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); pkru_faults++; dprintf1("<<<<==================================================\n"); - return; - if (trapno == 14) { - fprintf(stderr, - "ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n", - trapno, ip); - fprintf(stderr, "si_addr %p\n", si->si_addr); - fprintf(stderr, "REG_ERR: %lx\n", - (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); - exit(1); - } else { - fprintf(stderr, "unexpected trap %d! at 0x%lx\n", trapno, ip); - fprintf(stderr, "si_addr %p\n", si->si_addr); - fprintf(stderr, "REG_ERR: %lx\n", - (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); - exit(2); - } dprint_in_signal = 0; } @@ -393,10 +391,15 @@ pid_t fork_lazy_child(void) return forkret; } -#define PKEY_DISABLE_ACCESS 0x1 -#define PKEY_DISABLE_WRITE 0x2 +#ifndef PKEY_DISABLE_ACCESS +# define PKEY_DISABLE_ACCESS 0x1 +#endif + +#ifndef PKEY_DISABLE_WRITE +# define PKEY_DISABLE_WRITE 0x2 +#endif -u32 pkey_get(int pkey, unsigned long flags) +static u32 hw_pkey_get(int pkey, unsigned long flags) { u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); u32 pkru = __rdpkru(); @@ -418,7 +421,7 @@ u32 pkey_get(int pkey, unsigned long flags) return masked_pkru; } -int pkey_set(int pkey, unsigned long rights, unsigned long flags) +static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags) { u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); u32 old_pkru = __rdpkru(); @@ -452,15 +455,15 @@ void pkey_disable_set(int pkey, int flags) pkey, flags); pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); - pkey_rights = pkey_get(pkey, syscall_flags); + pkey_rights = hw_pkey_get(pkey, syscall_flags); - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights); pkey_assert(pkey_rights >= 0); pkey_rights |= flags; - ret = pkey_set(pkey, pkey_rights, syscall_flags); + ret = hw_pkey_set(pkey, pkey_rights, syscall_flags); assert(!ret); /*pkru and flags have the same format */ shadow_pkru |= flags << (pkey * 2); @@ -468,8 +471,8 @@ void pkey_disable_set(int pkey, int flags) pkey_assert(ret >= 0); - pkey_rights = pkey_get(pkey, syscall_flags); - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + pkey_rights = hw_pkey_get(pkey, syscall_flags); + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights); dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); @@ -483,24 +486,24 @@ void pkey_disable_clear(int pkey, int flags) { unsigned long syscall_flags = 0; int ret; - int pkey_rights = pkey_get(pkey, syscall_flags); + int pkey_rights = hw_pkey_get(pkey, syscall_flags); u32 orig_pkru = rdpkru(); pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights); pkey_assert(pkey_rights >= 0); pkey_rights |= flags; - ret = pkey_set(pkey, pkey_rights, 0); + ret = hw_pkey_set(pkey, pkey_rights, 0); /* pkru and flags have the same format */ shadow_pkru &= ~(flags << (pkey * 2)); pkey_assert(ret >= 0); - pkey_rights = pkey_get(pkey, syscall_flags); - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + pkey_rights = hw_pkey_get(pkey, syscall_flags); + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights); dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); @@ -674,10 +677,12 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, struct pkey_malloc_record { void *ptr; long size; + int prot; }; struct pkey_malloc_record *pkey_malloc_records; +struct pkey_malloc_record *pkey_last_malloc_record; long nr_pkey_malloc_records; -void record_pkey_malloc(void *ptr, long size) +void record_pkey_malloc(void *ptr, long size, int prot) { long i; struct pkey_malloc_record *rec = NULL; @@ -709,6 +714,8 @@ void record_pkey_malloc(void *ptr, long size) (int)(rec - pkey_malloc_records), rec, ptr, size); rec->ptr = ptr; rec->size = size; + rec->prot = prot; + pkey_last_malloc_record = rec; nr_pkey_malloc_records++; } @@ -753,7 +760,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) pkey_assert(ptr != (void *)-1); ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); pkey_assert(!ret); - record_pkey_malloc(ptr, size); + record_pkey_malloc(ptr, size, prot); rdpkru(); dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); @@ -774,7 +781,7 @@ void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) size = ALIGN_UP(size, HPAGE_SIZE * 2); ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); pkey_assert(ptr != (void *)-1); - record_pkey_malloc(ptr, size); + record_pkey_malloc(ptr, size, prot); mprotect_pkey(ptr, size, prot, pkey); dprintf1("unaligned ptr: %p\n", ptr); @@ -847,7 +854,7 @@ void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) pkey_assert(ptr != (void *)-1); mprotect_pkey(ptr, size, prot, pkey); - record_pkey_malloc(ptr, size); + record_pkey_malloc(ptr, size, prot); dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr); return ptr; @@ -869,7 +876,7 @@ void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey) mprotect_pkey(ptr, size, prot, pkey); - record_pkey_malloc(ptr, size); + record_pkey_malloc(ptr, size, prot); dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr); close(fd); @@ -918,13 +925,21 @@ void *malloc_pkey(long size, int prot, u16 pkey) } int last_pkru_faults; +#define UNKNOWN_PKEY -2 void expected_pk_fault(int pkey) { dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n", __func__, last_pkru_faults, pkru_faults); dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); pkey_assert(last_pkru_faults + 1 == pkru_faults); - pkey_assert(last_si_pkey == pkey); + + /* + * For exec-only memory, we do not know the pkey in + * advance, so skip this check. + */ + if (pkey != UNKNOWN_PKEY) + pkey_assert(last_si_pkey == pkey); + /* * The signal handler shold have cleared out PKRU to let the * test program continue. We now have to restore it. @@ -939,10 +954,11 @@ void expected_pk_fault(int pkey) last_si_pkey = -1; } -void do_not_expect_pk_fault(void) -{ - pkey_assert(last_pkru_faults == pkru_faults); -} +#define do_not_expect_pk_fault(msg) do { \ + if (last_pkru_faults != pkru_faults) \ + dprintf0("unexpected PK fault: %s\n", msg); \ + pkey_assert(last_pkru_faults == pkru_faults); \ +} while (0) int test_fds[10] = { -1 }; int nr_test_fds; @@ -1151,12 +1167,15 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) pkey_assert(i < NR_PKEYS*2); /* - * There are 16 pkeys supported in hardware. One is taken - * up for the default (0) and another can be taken up by - * an execute-only mapping. Ensure that we can allocate - * at least 14 (16-2). + * There are 16 pkeys supported in hardware. Three are + * allocated by the time we get here: + * 1. The default key (0) + * 2. One possibly consumed by an execute-only mapping. + * 3. One allocated by the test code and passed in via + * 'pkey' to this function. + * Ensure that we can allocate at least another 13 (16-3). */ - pkey_assert(i >= NR_PKEYS-2); + pkey_assert(i >= NR_PKEYS-3); for (i = 0; i < nr_allocated_pkeys; i++) { err = sys_pkey_free(allocated_pkeys[i]); @@ -1165,6 +1184,35 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) } } +/* + * pkey 0 is special. It is allocated by default, so you do not + * have to call pkey_alloc() to use it first. Make sure that it + * is usable. + */ +void test_mprotect_with_pkey_0(int *ptr, u16 pkey) +{ + long size; + int prot; + + assert(pkey_last_malloc_record); + size = pkey_last_malloc_record->size; + /* + * This is a bit of a hack. But mprotect() requires + * huge-page-aligned sizes when operating on hugetlbfs. + * So, make sure that we use something that's a multiple + * of a huge page when we can. + */ + if (size >= HPAGE_SIZE) + size = HPAGE_SIZE; + prot = pkey_last_malloc_record->prot; + + /* Use pkey 0 */ + mprotect_pkey(ptr, size, prot, 0); + + /* Make sure that we can set it back to the original pkey. */ + mprotect_pkey(ptr, size, prot, pkey); +} + void test_ptrace_of_child(int *ptr, u16 pkey) { __attribute__((__unused__)) int peek_result; @@ -1228,7 +1276,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey) pkey_assert(ret != -1); /* Now access from the current task, and expect NO exception: */ peek_result = read_ptr(plain_ptr); - do_not_expect_pk_fault(); + do_not_expect_pk_fault("read plain pointer after ptrace"); ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); pkey_assert(ret != -1); @@ -1241,12 +1289,9 @@ void test_ptrace_of_child(int *ptr, u16 pkey) free(plain_ptr_unaligned); } -void test_executing_on_unreadable_memory(int *ptr, u16 pkey) +void *get_pointer_to_instructions(void) { void *p1; - int scratch; - int ptr_contents; - int ret; p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE); dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write); @@ -1256,7 +1301,23 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey) /* Point 'p1' at the *second* page of the function: */ p1 += PAGE_SIZE; + /* + * Try to ensure we fault this in on next touch to ensure + * we get an instruction fault as opposed to a data one + */ madvise(p1, PAGE_SIZE, MADV_DONTNEED); + + return p1; +} + +void test_executing_on_unreadable_memory(int *ptr, u16 pkey) +{ + void *p1; + int scratch; + int ptr_contents; + int ret; + + p1 = get_pointer_to_instructions(); lots_o_noops_around_write(&scratch); ptr_contents = read_ptr(p1); dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); @@ -1272,12 +1333,55 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey) */ madvise(p1, PAGE_SIZE, MADV_DONTNEED); lots_o_noops_around_write(&scratch); - do_not_expect_pk_fault(); + do_not_expect_pk_fault("executing on PROT_EXEC memory"); ptr_contents = read_ptr(p1); dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); expected_pk_fault(pkey); } +void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) +{ + void *p1; + int scratch; + int ptr_contents; + int ret; + + dprintf1("%s() start\n", __func__); + + p1 = get_pointer_to_instructions(); + lots_o_noops_around_write(&scratch); + ptr_contents = read_ptr(p1); + dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); + + /* Use a *normal* mprotect(), not mprotect_pkey(): */ + ret = mprotect(p1, PAGE_SIZE, PROT_EXEC); + pkey_assert(!ret); + + dprintf2("pkru: %x\n", rdpkru()); + + /* Make sure this is an *instruction* fault */ + madvise(p1, PAGE_SIZE, MADV_DONTNEED); + lots_o_noops_around_write(&scratch); + do_not_expect_pk_fault("executing on PROT_EXEC memory"); + ptr_contents = read_ptr(p1); + dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); + expected_pk_fault(UNKNOWN_PKEY); + + /* + * Put the memory back to non-PROT_EXEC. Should clear the + * exec-only pkey off the VMA and allow it to be readable + * again. Go to PROT_NONE first to check for a kernel bug + * that did not clear the pkey when doing PROT_NONE. + */ + ret = mprotect(p1, PAGE_SIZE, PROT_NONE); + pkey_assert(!ret); + + ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC); + pkey_assert(!ret); + ptr_contents = read_ptr(p1); + do_not_expect_pk_fault("plain read on recently PROT_EXEC area"); +} + void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) { int size = PAGE_SIZE; @@ -1302,6 +1406,8 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = { test_kernel_gup_of_access_disabled_region, test_kernel_gup_write_to_write_disabled_region, test_executing_on_unreadable_memory, + test_implicit_mprotect_exec_only_memory, + test_mprotect_with_pkey_0, test_ptrace_of_child, test_pkey_syscalls_on_non_allocated_pkey, test_pkey_syscalls_bad_args, diff --git a/tools/testing/selftests/x86/test_syscall_vdso.c b/tools/testing/selftests/x86/test_syscall_vdso.c index 40370354d4c1..c9c3281077bc 100644 --- a/tools/testing/selftests/x86/test_syscall_vdso.c +++ b/tools/testing/selftests/x86/test_syscall_vdso.c @@ -100,12 +100,19 @@ asm ( " shl $32, %r8\n" " orq $0x7f7f7f7f, %r8\n" " movq %r8, %r9\n" - " movq %r8, %r10\n" - " movq %r8, %r11\n" - " movq %r8, %r12\n" - " movq %r8, %r13\n" - " movq %r8, %r14\n" - " movq %r8, %r15\n" + " incq %r9\n" + " movq %r9, %r10\n" + " incq %r10\n" + " movq %r10, %r11\n" + " incq %r11\n" + " movq %r11, %r12\n" + " incq %r12\n" + " movq %r12, %r13\n" + " incq %r13\n" + " movq %r13, %r14\n" + " incq %r14\n" + " movq %r14, %r15\n" + " incq %r15\n" " ret\n" " .code32\n" " .popsection\n" @@ -128,12 +135,13 @@ int check_regs64(void) int err = 0; int num = 8; uint64_t *r64 = ®s64.r8; + uint64_t expected = 0x7f7f7f7f7f7f7f7fULL; if (!kernel_is_64bit) return 0; do { - if (*r64 == 0x7f7f7f7f7f7f7f7fULL) + if (*r64 == expected++) continue; /* register did not change */ if (syscall_addr != (long)&int80) { /* @@ -147,18 +155,17 @@ int check_regs64(void) continue; } } else { - /* INT80 syscall entrypoint can be used by + /* + * INT80 syscall entrypoint can be used by * 64-bit programs too, unlike SYSCALL/SYSENTER. * Therefore it must preserve R12+ * (they are callee-saved registers in 64-bit C ABI). * - * This was probably historically not intended, - * but R8..11 are clobbered (cleared to 0). - * IOW: they are the only registers which aren't - * preserved across INT80 syscall. + * Starting in Linux 4.17 (and any kernel that + * backports the change), R8..11 are preserved. + * Historically (and probably unintentionally), they + * were clobbered or zeroed. */ - if (*r64 == 0 && num <= 11) - continue; } printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64); err++; |