diff options
Diffstat (limited to 'tools/testing')
61 files changed, 4135 insertions, 534 deletions
diff --git a/tools/testing/ktest/config-bisect.pl b/tools/testing/ktest/config-bisect.pl new file mode 100755 index 000000000000..b28feea7c363 --- /dev/null +++ b/tools/testing/ktest/config-bisect.pl @@ -0,0 +1,770 @@ +#!/usr/bin/perl -w +# +# Copyright 2015 - Steven Rostedt, Red Hat Inc. +# Copyright 2017 - Steven Rostedt, VMware, Inc. +# +# Licensed under the terms of the GNU GPL License version 2 +# + +# usage: +# config-bisect.pl [options] good-config bad-config [good|bad] +# + +# Compares a good config to a bad config, then takes half of the diffs +# and produces a config that is somewhere between the good config and +# the bad config. That is, the resulting config will start with the +# good config and will try to make half of the differences of between +# the good and bad configs match the bad config. It tries because of +# dependencies between the two configs it may not be able to change +# exactly half of the configs that are different between the two config +# files. + +# Here's a normal way to use it: +# +# $ cd /path/to/linux/kernel +# $ config-bisect.pl /path/to/good/config /path/to/bad/config + +# This will now pull in good config (blowing away .config in that directory +# so do not make that be one of the good or bad configs), and then +# build the config with "make oldconfig" to make sure it matches the +# current kernel. It will then store the configs in that result for +# the good config. It does the same for the bad config as well. +# The algorithm will run, merging half of the differences between +# the two configs and building them with "make oldconfig" to make sure +# the result changes (dependencies may reset changes the tool had made). +# It then copies the result of its good config to /path/to/good/config.tmp +# and the bad config to /path/to/bad/config.tmp (just appends ".tmp" to the +# files passed in). And the ".config" that you should test will be in +# directory + +# After the first run, determine if the result is good or bad then +# run the same command appending the result + +# For good results: +# $ config-bisect.pl /path/to/good/config /path/to/bad/config good + +# For bad results: +# $ config-bisect.pl /path/to/good/config /path/to/bad/config bad + +# Do not change the good-config or bad-config, config-bisect.pl will +# copy the good-config to a temp file with the same name as good-config +# but with a ".tmp" after it. It will do the same with the bad-config. + +# If "good" or "bad" is not stated at the end, it will copy the good and +# bad configs to the .tmp versions. If a .tmp version already exists, it will +# warn before writing over them (-r will not warn, and just write over them). +# If the last config is labeled "good", then it will copy it to the good .tmp +# version. If the last config is labeled "bad", it will copy it to the bad +# .tmp version. It will continue this until it can not merge the two any more +# without the result being equal to either the good or bad .tmp configs. + +my $start = 0; +my $val = ""; + +my $pwd = `pwd`; +chomp $pwd; +my $tree = $pwd; +my $build; + +my $output_config; +my $reset_bisect; + +sub usage { + print << "EOF" + +usage: config-bisect.pl [-l linux-tree][-b build-dir] good-config bad-config [good|bad] + -l [optional] define location of linux-tree (default is current directory) + -b [optional] define location to build (O=build-dir) (default is linux-tree) + good-config the config that is considered good + bad-config the config that does not work + "good" add this if the last run produced a good config + "bad" add this if the last run produced a bad config + If "good" or "bad" is not specified, then it is the start of a new bisect + + Note, each run will create copy of good and bad configs with ".tmp" appended. + +EOF +; + + exit(-1); +} + +sub doprint { + print @_; +} + +sub dodie { + doprint "CRITICAL FAILURE... ", @_, "\n"; + + die @_, "\n"; +} + +sub expand_path { + my ($file) = @_; + + if ($file =~ m,^/,) { + return $file; + } + return "$pwd/$file"; +} + +sub read_prompt { + my ($cancel, $prompt) = @_; + + my $ans; + + for (;;) { + if ($cancel) { + print "$prompt [y/n/C] "; + } else { + print "$prompt [y/N] "; + } + $ans = <STDIN>; + chomp $ans; + if ($ans =~ /^\s*$/) { + if ($cancel) { + $ans = "c"; + } else { + $ans = "n"; + } + } + last if ($ans =~ /^y$/i || $ans =~ /^n$/i); + if ($cancel) { + last if ($ans =~ /^c$/i); + print "Please answer either 'y', 'n' or 'c'.\n"; + } else { + print "Please answer either 'y' or 'n'.\n"; + } + } + if ($ans =~ /^c/i) { + exit; + } + if ($ans !~ /^y$/i) { + return 0; + } + return 1; +} + +sub read_yn { + my ($prompt) = @_; + + return read_prompt 0, $prompt; +} + +sub read_ync { + my ($prompt) = @_; + + return read_prompt 1, $prompt; +} + +sub run_command { + my ($command, $redirect) = @_; + my $start_time; + my $end_time; + my $dord = 0; + my $pid; + + $start_time = time; + + doprint("$command ... "); + + $pid = open(CMD, "$command 2>&1 |") or + dodie "unable to exec $command"; + + if (defined($redirect)) { + open (RD, ">$redirect") or + dodie "failed to write to redirect $redirect"; + $dord = 1; + } + + while (<CMD>) { + print RD if ($dord); + } + + waitpid($pid, 0); + my $failed = $?; + + close(CMD); + close(RD) if ($dord); + + $end_time = time; + my $delta = $end_time - $start_time; + + if ($delta == 1) { + doprint "[1 second] "; + } else { + doprint "[$delta seconds] "; + } + + if ($failed) { + doprint "FAILED!\n"; + } else { + doprint "SUCCESS\n"; + } + + return !$failed; +} + +###### CONFIG BISECT ###### + +# config_ignore holds the configs that were set (or unset) for +# a good config and we will ignore these configs for the rest +# of a config bisect. These configs stay as they were. +my %config_ignore; + +# config_set holds what all configs were set as. +my %config_set; + +# config_off holds the set of configs that the bad config had disabled. +# We need to record them and set them in the .config when running +# olddefconfig, because olddefconfig keeps the defaults. +my %config_off; + +# config_off_tmp holds a set of configs to turn off for now +my @config_off_tmp; + +# config_list is the set of configs that are being tested +my %config_list; +my %null_config; + +my %dependency; + +my $make; + +sub make_oldconfig { + + if (!run_command "$make olddefconfig") { + # Perhaps olddefconfig doesn't exist in this version of the kernel + # try oldnoconfig + doprint "olddefconfig failed, trying make oldnoconfig\n"; + if (!run_command "$make oldnoconfig") { + doprint "oldnoconfig failed, trying yes '' | make oldconfig\n"; + # try a yes '' | oldconfig + run_command "yes '' | $make oldconfig" or + dodie "failed make config oldconfig"; + } + } +} + +sub assign_configs { + my ($hash, $config) = @_; + + doprint "Reading configs from $config\n"; + + open (IN, $config) + or dodie "Failed to read $config"; + + while (<IN>) { + chomp; + if (/^((CONFIG\S*)=.*)/) { + ${$hash}{$2} = $1; + } elsif (/^(# (CONFIG\S*) is not set)/) { + ${$hash}{$2} = $1; + } + } + + close(IN); +} + +sub process_config_ignore { + my ($config) = @_; + + assign_configs \%config_ignore, $config; +} + +sub get_dependencies { + my ($config) = @_; + + my $arr = $dependency{$config}; + if (!defined($arr)) { + return (); + } + + my @deps = @{$arr}; + + foreach my $dep (@{$arr}) { + print "ADD DEP $dep\n"; + @deps = (@deps, get_dependencies $dep); + } + + return @deps; +} + +sub save_config { + my ($pc, $file) = @_; + + my %configs = %{$pc}; + + doprint "Saving configs into $file\n"; + + open(OUT, ">$file") or dodie "Can not write to $file"; + + foreach my $config (keys %configs) { + print OUT "$configs{$config}\n"; + } + close(OUT); +} + +sub create_config { + my ($name, $pc) = @_; + + doprint "Creating old config from $name configs\n"; + + save_config $pc, $output_config; + + make_oldconfig; +} + +# compare two config hashes, and return configs with different vals. +# It returns B's config values, but you can use A to see what A was. +sub diff_config_vals { + my ($pa, $pb) = @_; + + # crappy Perl way to pass in hashes. + my %a = %{$pa}; + my %b = %{$pb}; + + my %ret; + + foreach my $item (keys %a) { + if (defined($b{$item}) && $b{$item} ne $a{$item}) { + $ret{$item} = $b{$item}; + } + } + + return %ret; +} + +# compare two config hashes and return the configs in B but not A +sub diff_configs { + my ($pa, $pb) = @_; + + my %ret; + + # crappy Perl way to pass in hashes. + my %a = %{$pa}; + my %b = %{$pb}; + + foreach my $item (keys %b) { + if (!defined($a{$item})) { + $ret{$item} = $b{$item}; + } + } + + return %ret; +} + +# return if two configs are equal or not +# 0 is equal +1 b has something a does not +# +1 if a and b have a different item. +# -1 if a has something b does not +sub compare_configs { + my ($pa, $pb) = @_; + + my %ret; + + # crappy Perl way to pass in hashes. + my %a = %{$pa}; + my %b = %{$pb}; + + foreach my $item (keys %b) { + if (!defined($a{$item})) { + return 1; + } + if ($a{$item} ne $b{$item}) { + return 1; + } + } + + foreach my $item (keys %a) { + if (!defined($b{$item})) { + return -1; + } + } + + return 0; +} + +sub process_failed { + my ($config) = @_; + + doprint "\n\n***************************************\n"; + doprint "Found bad config: $config\n"; + doprint "***************************************\n\n"; +} + +sub process_new_config { + my ($tc, $nc, $gc, $bc) = @_; + + my %tmp_config = %{$tc}; + my %good_configs = %{$gc}; + my %bad_configs = %{$bc}; + + my %new_configs; + + my $runtest = 1; + my $ret; + + create_config "tmp_configs", \%tmp_config; + assign_configs \%new_configs, $output_config; + + $ret = compare_configs \%new_configs, \%bad_configs; + if (!$ret) { + doprint "New config equals bad config, try next test\n"; + $runtest = 0; + } + + if ($runtest) { + $ret = compare_configs \%new_configs, \%good_configs; + if (!$ret) { + doprint "New config equals good config, try next test\n"; + $runtest = 0; + } + } + + %{$nc} = %new_configs; + + return $runtest; +} + +sub convert_config { + my ($config) = @_; + + if ($config =~ /^# (.*) is not set/) { + $config = "$1=n"; + } + + $config =~ s/^CONFIG_//; + return $config; +} + +sub print_config { + my ($sym, $config) = @_; + + $config = convert_config $config; + doprint "$sym$config\n"; +} + +sub print_config_compare { + my ($good_config, $bad_config) = @_; + + $good_config = convert_config $good_config; + $bad_config = convert_config $bad_config; + + my $good_value = $good_config; + my $bad_value = $bad_config; + $good_value =~ s/(.*)=//; + my $config = $1; + + $bad_value =~ s/.*=//; + + doprint " $config $good_value -> $bad_value\n"; +} + +# Pass in: +# $phalf: half of the configs names you want to add +# $oconfigs: The orginial configs to start with +# $sconfigs: The source to update $oconfigs with (from $phalf) +# $which: The name of which half that is updating (top / bottom) +# $type: The name of the source type (good / bad) +sub make_half { + my ($phalf, $oconfigs, $sconfigs, $which, $type) = @_; + + my @half = @{$phalf}; + my %orig_configs = %{$oconfigs}; + my %source_configs = %{$sconfigs}; + + my %tmp_config = %orig_configs; + + doprint "Settings bisect with $which half of $type configs:\n"; + foreach my $item (@half) { + doprint "Updating $item to $source_configs{$item}\n"; + $tmp_config{$item} = $source_configs{$item}; + } + + return %tmp_config; +} + +sub run_config_bisect { + my ($pgood, $pbad) = @_; + + my %good_configs = %{$pgood}; + my %bad_configs = %{$pbad}; + + my %diff_configs = diff_config_vals \%good_configs, \%bad_configs; + my %b_configs = diff_configs \%good_configs, \%bad_configs; + my %g_configs = diff_configs \%bad_configs, \%good_configs; + + # diff_arr is what is in both good and bad but are different (y->n) + my @diff_arr = keys %diff_configs; + my $len_diff = $#diff_arr + 1; + + # b_arr is what is in bad but not in good (has depends) + my @b_arr = keys %b_configs; + my $len_b = $#b_arr + 1; + + # g_arr is what is in good but not in bad + my @g_arr = keys %g_configs; + my $len_g = $#g_arr + 1; + + my $runtest = 0; + my %new_configs; + my $ret; + + # Look at the configs that are different between good and bad. + # This does not include those that depend on other configs + # (configs depending on other configs that are not set would + # not show up even as a "# CONFIG_FOO is not set" + + + doprint "# of configs to check: $len_diff\n"; + doprint "# of configs showing only in good: $len_g\n"; + doprint "# of configs showing only in bad: $len_b\n"; + + if ($len_diff > 0) { + # Now test for different values + + doprint "Configs left to check:\n"; + doprint " Good Config\t\t\tBad Config\n"; + doprint " -----------\t\t\t----------\n"; + foreach my $item (@diff_arr) { + doprint " $good_configs{$item}\t$bad_configs{$item}\n"; + } + + my $half = int($#diff_arr / 2); + my @tophalf = @diff_arr[0 .. $half]; + + doprint "Set tmp config to be good config with some bad config values\n"; + + my %tmp_config = make_half \@tophalf, \%good_configs, + \%bad_configs, "top", "bad"; + + $runtest = process_new_config \%tmp_config, \%new_configs, + \%good_configs, \%bad_configs; + + if (!$runtest) { + doprint "Set tmp config to be bad config with some good config values\n"; + + my %tmp_config = make_half \@tophalf, \%bad_configs, + \%good_configs, "top", "good"; + + $runtest = process_new_config \%tmp_config, \%new_configs, + \%good_configs, \%bad_configs; + } + } + + if (!$runtest && $len_diff > 0) { + # do the same thing, but this time with bottom half + + my $half = int($#diff_arr / 2); + my @bottomhalf = @diff_arr[$half+1 .. $#diff_arr]; + + doprint "Set tmp config to be good config with some bad config values\n"; + + my %tmp_config = make_half \@bottomhalf, \%good_configs, + \%bad_configs, "bottom", "bad"; + + $runtest = process_new_config \%tmp_config, \%new_configs, + \%good_configs, \%bad_configs; + + if (!$runtest) { + doprint "Set tmp config to be bad config with some good config values\n"; + + my %tmp_config = make_half \@bottomhalf, \%bad_configs, + \%good_configs, "bottom", "good"; + + $runtest = process_new_config \%tmp_config, \%new_configs, + \%good_configs, \%bad_configs; + } + } + + if ($runtest) { + make_oldconfig; + doprint "READY TO TEST .config IN $build\n"; + return 0; + } + + doprint "\n%%%%%%%% FAILED TO FIND SINGLE BAD CONFIG %%%%%%%%\n"; + doprint "Hmm, can't make any more changes without making good == bad?\n"; + doprint "Difference between good (+) and bad (-)\n"; + + foreach my $item (keys %bad_configs) { + if (!defined($good_configs{$item})) { + print_config "-", $bad_configs{$item}; + } + } + + foreach my $item (keys %good_configs) { + next if (!defined($bad_configs{$item})); + if ($good_configs{$item} ne $bad_configs{$item}) { + print_config_compare $good_configs{$item}, $bad_configs{$item}; + } + } + + foreach my $item (keys %good_configs) { + if (!defined($bad_configs{$item})) { + print_config "+", $good_configs{$item}; + } + } + return -1; +} + +sub config_bisect { + my ($good_config, $bad_config) = @_; + my $ret; + + my %good_configs; + my %bad_configs; + my %tmp_configs; + + doprint "Run good configs through make oldconfig\n"; + assign_configs \%tmp_configs, $good_config; + create_config "$good_config", \%tmp_configs; + assign_configs \%good_configs, $output_config; + + doprint "Run bad configs through make oldconfig\n"; + assign_configs \%tmp_configs, $bad_config; + create_config "$bad_config", \%tmp_configs; + assign_configs \%bad_configs, $output_config; + + save_config \%good_configs, $good_config; + save_config \%bad_configs, $bad_config; + + return run_config_bisect \%good_configs, \%bad_configs; +} + +while ($#ARGV >= 0) { + if ($ARGV[0] !~ m/^-/) { + last; + } + my $opt = shift @ARGV; + + if ($opt eq "-b") { + $val = shift @ARGV; + if (!defined($val)) { + die "-b requires value\n"; + } + $build = $val; + } + + elsif ($opt eq "-l") { + $val = shift @ARGV; + if (!defined($val)) { + die "-l requires value\n"; + } + $tree = $val; + } + + elsif ($opt eq "-r") { + $reset_bisect = 1; + } + + elsif ($opt eq "-h") { + usage; + } + + else { + die "Unknow option $opt\n"; + } +} + +$build = $tree if (!defined($build)); + +$tree = expand_path $tree; +$build = expand_path $build; + +if ( ! -d $tree ) { + die "$tree not a directory\n"; +} + +if ( ! -d $build ) { + die "$build not a directory\n"; +} + +usage if $#ARGV < 1; + +if ($#ARGV == 1) { + $start = 1; +} elsif ($#ARGV == 2) { + $val = $ARGV[2]; + if ($val ne "good" && $val ne "bad") { + die "Unknown command '$val', bust be either \"good\" or \"bad\"\n"; + } +} else { + usage; +} + +my $good_start = expand_path $ARGV[0]; +my $bad_start = expand_path $ARGV[1]; + +my $good = "$good_start.tmp"; +my $bad = "$bad_start.tmp"; + +$make = "make"; + +if ($build ne $tree) { + $make = "make O=$build" +} + +$output_config = "$build/.config"; + +if ($start) { + if ( ! -f $good_start ) { + die "$good_start not found\n"; + } + if ( ! -f $bad_start ) { + die "$bad_start not found\n"; + } + if ( -f $good || -f $bad ) { + my $p = ""; + + if ( -f $good ) { + $p = "$good exists\n"; + } + + if ( -f $bad ) { + $p = "$p$bad exists\n"; + } + + if (!defined($reset_bisect)) { + if (!read_yn "${p}Overwrite and start new bisect anyway?") { + exit (-1); + } + } + } + run_command "cp $good_start $good" or die "failed to copy to $good\n"; + run_command "cp $bad_start $bad" or die "faield to copy to $bad\n"; +} else { + if ( ! -f $good ) { + die "Can not find file $good\n"; + } + if ( ! -f $bad ) { + die "Can not find file $bad\n"; + } + if ($val eq "good") { + run_command "cp $output_config $good" or die "failed to copy $config to $good\n"; + } elsif ($val eq "bad") { + run_command "cp $output_config $bad" or die "failed to copy $config to $bad\n"; + } +} + +chdir $tree || die "can't change directory to $tree"; + +my $ret = config_bisect $good, $bad; + +if (!$ret) { + exit(0); +} + +if ($ret > 0) { + doprint "Cleaning temp files\n"; + run_command "rm $good"; + run_command "rm $bad"; + exit(1); +} else { + doprint "See good and bad configs for details:\n"; + doprint "good: $good\n"; + doprint "bad: $bad\n"; + doprint "%%%%%%%% FAILED TO FIND SINGLE BAD CONFIG %%%%%%%%\n"; +} +exit(2); diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 8809f244bb7c..87af8a68ab25 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -10,6 +10,7 @@ use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK); use File::Path qw(mkpath); use File::Copy qw(cp); use FileHandle; +use FindBin; my $VERSION = "0.2"; @@ -22,6 +23,11 @@ my %evals; #default opts my %default = ( + "MAILER" => "sendmail", # default mailer + "EMAIL_ON_ERROR" => 1, + "EMAIL_WHEN_FINISHED" => 1, + "EMAIL_WHEN_CANCELED" => 0, + "EMAIL_WHEN_STARTED" => 0, "NUM_TESTS" => 1, "TEST_TYPE" => "build", "BUILD_TYPE" => "randconfig", @@ -59,6 +65,7 @@ my %default = ( "GRUB_REBOOT" => "grub2-reboot", "SYSLINUX" => "extlinux", "SYSLINUX_PATH" => "/boot/extlinux", + "CONNECT_TIMEOUT" => 25, # required, and we will ask users if they don't have them but we keep the default # value something that is common. @@ -163,6 +170,8 @@ my $store_failures; my $store_successes; my $test_name; my $timeout; +my $connect_timeout; +my $config_bisect_exec; my $booted_timeout; my $detect_triplefault; my $console; @@ -204,6 +213,20 @@ my $install_time; my $reboot_time; my $test_time; +my $pwd; +my $dirname = $FindBin::Bin; + +my $mailto; +my $mailer; +my $mail_path; +my $mail_command; +my $email_on_error; +my $email_when_finished; +my $email_when_started; +my $email_when_canceled; + +my $script_start_time = localtime(); + # set when a test is something other that just building or install # which would require more options. my $buildonly = 1; @@ -229,6 +252,14 @@ my $no_reboot = 1; my $reboot_success = 0; my %option_map = ( + "MAILTO" => \$mailto, + "MAILER" => \$mailer, + "MAIL_PATH" => \$mail_path, + "MAIL_COMMAND" => \$mail_command, + "EMAIL_ON_ERROR" => \$email_on_error, + "EMAIL_WHEN_FINISHED" => \$email_when_finished, + "EMAIL_WHEN_STARTED" => \$email_when_started, + "EMAIL_WHEN_CANCELED" => \$email_when_canceled, "MACHINE" => \$machine, "SSH_USER" => \$ssh_user, "TMP_DIR" => \$tmpdir, @@ -296,6 +327,8 @@ my %option_map = ( "STORE_SUCCESSES" => \$store_successes, "TEST_NAME" => \$test_name, "TIMEOUT" => \$timeout, + "CONNECT_TIMEOUT" => \$connect_timeout, + "CONFIG_BISECT_EXEC" => \$config_bisect_exec, "BOOTED_TIMEOUT" => \$booted_timeout, "CONSOLE" => \$console, "CLOSE_CONSOLE_SIGNAL" => \$close_console_signal, @@ -337,6 +370,7 @@ my %used_options; # default variables that can be used chomp ($variable{"PWD"} = `pwd`); +$pwd = $variable{"PWD"}; $config_help{"MACHINE"} = << "EOF" The machine hostname that you will test. @@ -718,22 +752,14 @@ sub set_value { my $prvalue = process_variables($rvalue); - if ($buildonly && $lvalue =~ /^TEST_TYPE(\[.*\])?$/ && $prvalue ne "build") { + if ($lvalue =~ /^(TEST|BISECT|CONFIG_BISECT)_TYPE(\[.*\])?$/ && + $prvalue !~ /^(config_|)bisect$/ && + $prvalue !~ /^build$/ && + $buildonly) { + # Note if a test is something other than build, then we # will need other mandatory options. if ($prvalue ne "install") { - # for bisect, we need to check BISECT_TYPE - if ($prvalue ne "bisect") { - $buildonly = 0; - } - } else { - # install still limits some mandatory options. - $buildonly = 2; - } - } - - if ($buildonly && $lvalue =~ /^BISECT_TYPE(\[.*\])?$/ && $prvalue ne "build") { - if ($prvalue ne "install") { $buildonly = 0; } else { # install still limits some mandatory options. @@ -1140,7 +1166,8 @@ sub __read_config { sub get_test_case { print "What test case would you like to run?\n"; print " (build, install or boot)\n"; - print " Other tests are available but require editing the config file\n"; + print " Other tests are available but require editing ktest.conf\n"; + print " (see tools/testing/ktest/sample.conf)\n"; my $ans = <STDIN>; chomp $ans; $default{"TEST_TYPE"} = $ans; @@ -1328,8 +1355,8 @@ sub reboot { my ($time) = @_; my $powercycle = 0; - # test if the machine can be connected to within 5 seconds - my $stat = run_ssh("echo check machine status", 5); + # test if the machine can be connected to within a few seconds + my $stat = run_ssh("echo check machine status", $connect_timeout); if (!$stat) { doprint("power cycle\n"); $powercycle = 1; @@ -1404,10 +1431,18 @@ sub do_not_reboot { return $test_type eq "build" || $no_reboot || ($test_type eq "patchcheck" && $opt{"PATCHCHECK_TYPE[$i]"} eq "build") || - ($test_type eq "bisect" && $opt{"BISECT_TYPE[$i]"} eq "build"); + ($test_type eq "bisect" && $opt{"BISECT_TYPE[$i]"} eq "build") || + ($test_type eq "config_bisect" && $opt{"CONFIG_BISECT_TYPE[$i]"} eq "build"); } +my $in_die = 0; + sub dodie { + + # avoid recusion + return if ($in_die); + $in_die = 1; + doprint "CRITICAL FAILURE... ", @_, "\n"; my $i = $iteration; @@ -1426,6 +1461,11 @@ sub dodie { print " See $opt{LOG_FILE} for more info.\n"; } + if ($email_on_error) { + send_email("KTEST: critical failure for your [$test_type] test", + "Your test started at $script_start_time has failed with:\n@_\n"); + } + if ($monitor_cnt) { # restore terminal settings system("stty $stty_orig"); @@ -1477,7 +1517,7 @@ sub exec_console { close($pts); exec $console or - die "Can't open console $console"; + dodie "Can't open console $console"; } sub open_console { @@ -1515,6 +1555,9 @@ sub close_console { doprint "kill child process $pid\n"; kill $close_console_signal, $pid; + doprint "wait for child process $pid to exit\n"; + waitpid($pid, 0); + print "closing!\n"; close($fp); @@ -1625,7 +1668,7 @@ sub save_logs { if (!-d $dir) { mkpath($dir) or - die "can't create $dir"; + dodie "can't create $dir"; } my %files = ( @@ -1638,7 +1681,7 @@ sub save_logs { while (my ($name, $source) = each(%files)) { if (-f "$source") { cp "$source", "$dir/$name" or - die "failed to copy $source"; + dodie "failed to copy $source"; } } @@ -1692,6 +1735,7 @@ sub run_command { my $end_time; my $dolog = 0; my $dord = 0; + my $dostdout = 0; my $pid; $command =~ s/\$SSH_USER/$ssh_user/g; @@ -1710,9 +1754,15 @@ sub run_command { } if (defined($redirect)) { - open (RD, ">$redirect") or - dodie "failed to write to redirect $redirect"; - $dord = 1; + if ($redirect eq 1) { + $dostdout = 1; + # Have the output of the command on its own line + doprint "\n"; + } else { + open (RD, ">$redirect") or + dodie "failed to write to redirect $redirect"; + $dord = 1; + } } my $hit_timeout = 0; @@ -1734,6 +1784,7 @@ sub run_command { } print LOG $line if ($dolog); print RD $line if ($dord); + print $line if ($dostdout); } waitpid($pid, 0); @@ -1812,7 +1863,7 @@ sub get_grub2_index { $ssh_grub =~ s,\$SSH_COMMAND,cat $grub_file,g; open(IN, "$ssh_grub |") - or die "unable to get $grub_file"; + or dodie "unable to get $grub_file"; my $found = 0; @@ -1821,13 +1872,13 @@ sub get_grub2_index { $grub_number++; $found = 1; last; - } elsif (/^menuentry\s/) { + } elsif (/^menuentry\s|^submenu\s/) { $grub_number++; } } close(IN); - die "Could not find '$grub_menu' in $grub_file on $machine" + dodie "Could not find '$grub_menu' in $grub_file on $machine" if (!$found); doprint "$grub_number\n"; $last_grub_menu = $grub_menu; @@ -1855,7 +1906,7 @@ sub get_grub_index { $ssh_grub =~ s,\$SSH_COMMAND,cat /boot/grub/menu.lst,g; open(IN, "$ssh_grub |") - or die "unable to get menu.lst"; + or dodie "unable to get menu.lst"; my $found = 0; @@ -1870,7 +1921,7 @@ sub get_grub_index { } close(IN); - die "Could not find '$grub_menu' in /boot/grub/menu on $machine" + dodie "Could not find '$grub_menu' in /boot/grub/menu on $machine" if (!$found); doprint "$grub_number\n"; $last_grub_menu = $grub_menu; @@ -1983,7 +2034,7 @@ sub monitor { my $full_line = ""; open(DMESG, "> $dmesg") or - die "unable to write to $dmesg"; + dodie "unable to write to $dmesg"; reboot_to; @@ -2862,7 +2913,7 @@ sub run_bisect { sub update_bisect_replay { my $tmp_log = "$tmpdir/ktest_bisect_log"; run_command "git bisect log > $tmp_log" or - die "can't create bisect log"; + dodie "can't create bisect log"; return $tmp_log; } @@ -2871,9 +2922,9 @@ sub bisect { my $result; - die "BISECT_GOOD[$i] not defined\n" if (!defined($bisect_good)); - die "BISECT_BAD[$i] not defined\n" if (!defined($bisect_bad)); - die "BISECT_TYPE[$i] not defined\n" if (!defined($bisect_type)); + dodie "BISECT_GOOD[$i] not defined\n" if (!defined($bisect_good)); + dodie "BISECT_BAD[$i] not defined\n" if (!defined($bisect_bad)); + dodie "BISECT_TYPE[$i] not defined\n" if (!defined($bisect_type)); my $good = $bisect_good; my $bad = $bisect_bad; @@ -2936,7 +2987,7 @@ sub bisect { if ($check ne "good") { doprint "TESTING BISECT BAD [$bad]\n"; run_command "git checkout $bad" or - die "Failed to checkout $bad"; + dodie "Failed to checkout $bad"; $result = run_bisect $type; @@ -2948,7 +2999,7 @@ sub bisect { if ($check ne "bad") { doprint "TESTING BISECT GOOD [$good]\n"; run_command "git checkout $good" or - die "Failed to checkout $good"; + dodie "Failed to checkout $good"; $result = run_bisect $type; @@ -2959,7 +3010,7 @@ sub bisect { # checkout where we started run_command "git checkout $head" or - die "Failed to checkout $head"; + dodie "Failed to checkout $head"; } run_command "git bisect start$start_files" or @@ -3092,76 +3143,6 @@ sub create_config { make_oldconfig; } -# compare two config hashes, and return configs with different vals. -# It returns B's config values, but you can use A to see what A was. -sub diff_config_vals { - my ($pa, $pb) = @_; - - # crappy Perl way to pass in hashes. - my %a = %{$pa}; - my %b = %{$pb}; - - my %ret; - - foreach my $item (keys %a) { - if (defined($b{$item}) && $b{$item} ne $a{$item}) { - $ret{$item} = $b{$item}; - } - } - - return %ret; -} - -# compare two config hashes and return the configs in B but not A -sub diff_configs { - my ($pa, $pb) = @_; - - my %ret; - - # crappy Perl way to pass in hashes. - my %a = %{$pa}; - my %b = %{$pb}; - - foreach my $item (keys %b) { - if (!defined($a{$item})) { - $ret{$item} = $b{$item}; - } - } - - return %ret; -} - -# return if two configs are equal or not -# 0 is equal +1 b has something a does not -# +1 if a and b have a different item. -# -1 if a has something b does not -sub compare_configs { - my ($pa, $pb) = @_; - - my %ret; - - # crappy Perl way to pass in hashes. - my %a = %{$pa}; - my %b = %{$pb}; - - foreach my $item (keys %b) { - if (!defined($a{$item})) { - return 1; - } - if ($a{$item} ne $b{$item}) { - return 1; - } - } - - foreach my $item (keys %a) { - if (!defined($b{$item})) { - return -1; - } - } - - return 0; -} - sub run_config_bisect_test { my ($type) = @_; @@ -3174,166 +3155,57 @@ sub run_config_bisect_test { return $ret; } -sub process_failed { - my ($config) = @_; +sub config_bisect_end { + my ($good, $bad) = @_; + my $diffexec = "diff -u"; + if (-f "$builddir/scripts/diffconfig") { + $diffexec = "$builddir/scripts/diffconfig"; + } doprint "\n\n***************************************\n"; - doprint "Found bad config: $config\n"; + doprint "No more config bisecting possible.\n"; + run_command "$diffexec $good $bad", 1; doprint "***************************************\n\n"; } -# used for config bisecting -my $good_config; -my $bad_config; - -sub process_new_config { - my ($tc, $nc, $gc, $bc) = @_; - - my %tmp_config = %{$tc}; - my %good_configs = %{$gc}; - my %bad_configs = %{$bc}; - - my %new_configs; - - my $runtest = 1; - my $ret; - - create_config "tmp_configs", \%tmp_config; - assign_configs \%new_configs, $output_config; - - $ret = compare_configs \%new_configs, \%bad_configs; - if (!$ret) { - doprint "New config equals bad config, try next test\n"; - $runtest = 0; - } - - if ($runtest) { - $ret = compare_configs \%new_configs, \%good_configs; - if (!$ret) { - doprint "New config equals good config, try next test\n"; - $runtest = 0; - } - } - - %{$nc} = %new_configs; - - return $runtest; -} - sub run_config_bisect { - my ($pgood, $pbad) = @_; - - my $type = $config_bisect_type; - - my %good_configs = %{$pgood}; - my %bad_configs = %{$pbad}; - - my %diff_configs = diff_config_vals \%good_configs, \%bad_configs; - my %b_configs = diff_configs \%good_configs, \%bad_configs; - my %g_configs = diff_configs \%bad_configs, \%good_configs; - - my @diff_arr = keys %diff_configs; - my $len_diff = $#diff_arr + 1; - - my @b_arr = keys %b_configs; - my $len_b = $#b_arr + 1; - - my @g_arr = keys %g_configs; - my $len_g = $#g_arr + 1; - - my $runtest = 1; - my %new_configs; + my ($good, $bad, $last_result) = @_; + my $reset = ""; + my $cmd; my $ret; - # First, lets get it down to a single subset. - # Is the problem with a difference in values? - # Is the problem with a missing config? - # Is the problem with a config that breaks things? - - # Enable all of one set and see if we get a new bad - # or good config. - - # first set the good config to the bad values. - - doprint "d=$len_diff g=$len_g b=$len_b\n"; - - # first lets enable things in bad config that are enabled in good config - - if ($len_diff > 0) { - if ($len_b > 0 || $len_g > 0) { - my %tmp_config = %bad_configs; - - doprint "Set tmp config to be bad config with good config values\n"; - foreach my $item (@diff_arr) { - $tmp_config{$item} = $good_configs{$item}; - } - - $runtest = process_new_config \%tmp_config, \%new_configs, - \%good_configs, \%bad_configs; - } + if (!length($last_result)) { + $reset = "-r"; } + run_command "$config_bisect_exec $reset -b $outputdir $good $bad $last_result", 1; - if (!$runtest && $len_diff > 0) { - - if ($len_diff == 1) { - process_failed $diff_arr[0]; - return 1; - } - my %tmp_config = %bad_configs; - - my $half = int($#diff_arr / 2); - my @tophalf = @diff_arr[0 .. $half]; - - doprint "Settings bisect with top half:\n"; - doprint "Set tmp config to be bad config with some good config values\n"; - foreach my $item (@tophalf) { - $tmp_config{$item} = $good_configs{$item}; - } - - $runtest = process_new_config \%tmp_config, \%new_configs, - \%good_configs, \%bad_configs; - - if (!$runtest) { - my %tmp_config = %bad_configs; - - doprint "Try bottom half\n"; - - my @bottomhalf = @diff_arr[$half+1 .. $#diff_arr]; - - foreach my $item (@bottomhalf) { - $tmp_config{$item} = $good_configs{$item}; - } - - $runtest = process_new_config \%tmp_config, \%new_configs, - \%good_configs, \%bad_configs; - } + # config-bisect returns: + # 0 if there is more to bisect + # 1 for finding a good config + # 2 if it can not find any more configs + # -1 (255) on error + if ($run_command_status) { + return $run_command_status; } - if ($runtest) { - $ret = run_config_bisect_test $type; - if ($ret) { - doprint "NEW GOOD CONFIG\n"; - %good_configs = %new_configs; - run_command "mv $good_config ${good_config}.last"; - save_config \%good_configs, $good_config; - %{$pgood} = %good_configs; - } else { - doprint "NEW BAD CONFIG\n"; - %bad_configs = %new_configs; - run_command "mv $bad_config ${bad_config}.last"; - save_config \%bad_configs, $bad_config; - %{$pbad} = %bad_configs; - } - return 0; + $ret = run_config_bisect_test $config_bisect_type; + if ($ret) { + doprint "NEW GOOD CONFIG\n"; + # Return 3 for good config + return 3; + } else { + doprint "NEW BAD CONFIG\n"; + # Return 4 for bad config + return 4; } - - fail "Hmm, need to do a mix match?\n"; - return -1; } sub config_bisect { my ($i) = @_; + my $good_config; + my $bad_config; + my $type = $config_bisect_type; my $ret; @@ -3353,6 +3225,24 @@ sub config_bisect { $good_config = $output_config; } + if (!defined($config_bisect_exec)) { + # First check the location that ktest.pl ran + my @locations = ( "$pwd/config-bisect.pl", + "$dirname/config-bisect.pl", + "$builddir/tools/testing/ktest/config-bisect.pl", + undef ); + foreach my $loc (@locations) { + doprint "loc = $loc\n"; + $config_bisect_exec = $loc; + last if (defined($config_bisect_exec && -x $config_bisect_exec)); + } + if (!defined($config_bisect_exec)) { + fail "Could not find an executable config-bisect.pl\n", + " Set CONFIG_BISECT_EXEC to point to config-bisect.pl"; + return 1; + } + } + # we don't want min configs to cause issues here. doprint "Disabling 'MIN_CONFIG' for this test\n"; undef $minconfig; @@ -3361,21 +3251,31 @@ sub config_bisect { my %bad_configs; my %tmp_configs; + if (-f "$tmpdir/good_config.tmp" || -f "$tmpdir/bad_config.tmp") { + if (read_yn "Interrupted config-bisect. Continue (n - will start new)?") { + if (-f "$tmpdir/good_config.tmp") { + $good_config = "$tmpdir/good_config.tmp"; + } else { + $good_config = "$tmpdir/good_config"; + } + if (-f "$tmpdir/bad_config.tmp") { + $bad_config = "$tmpdir/bad_config.tmp"; + } else { + $bad_config = "$tmpdir/bad_config"; + } + } + } doprint "Run good configs through make oldconfig\n"; assign_configs \%tmp_configs, $good_config; create_config "$good_config", \%tmp_configs; - assign_configs \%good_configs, $output_config; + $good_config = "$tmpdir/good_config"; + system("cp $output_config $good_config") == 0 or dodie "cp good config"; doprint "Run bad configs through make oldconfig\n"; assign_configs \%tmp_configs, $bad_config; create_config "$bad_config", \%tmp_configs; - assign_configs \%bad_configs, $output_config; - - $good_config = "$tmpdir/good_config"; $bad_config = "$tmpdir/bad_config"; - - save_config \%good_configs, $good_config; - save_config \%bad_configs, $bad_config; + system("cp $output_config $bad_config") == 0 or dodie "cp bad config"; if (defined($config_bisect_check) && $config_bisect_check ne "0") { if ($config_bisect_check ne "good") { @@ -3398,10 +3298,21 @@ sub config_bisect { } } + my $last_run = ""; + do { - $ret = run_config_bisect \%good_configs, \%bad_configs; + $ret = run_config_bisect $good_config, $bad_config, $last_run; + if ($ret == 3) { + $last_run = "good"; + } elsif ($ret == 4) { + $last_run = "bad"; + } print_times; - } while (!$ret); + } while ($ret == 3 || $ret == 4); + + if ($ret == 2) { + config_bisect_end "$good_config.tmp", "$bad_config.tmp"; + } return $ret if ($ret < 0); @@ -3416,9 +3327,9 @@ sub patchcheck_reboot { sub patchcheck { my ($i) = @_; - die "PATCHCHECK_START[$i] not defined\n" + dodie "PATCHCHECK_START[$i] not defined\n" if (!defined($patchcheck_start)); - die "PATCHCHECK_TYPE[$i] not defined\n" + dodie "PATCHCHECK_TYPE[$i] not defined\n" if (!defined($patchcheck_type)); my $start = $patchcheck_start; @@ -3432,7 +3343,7 @@ sub patchcheck { if (defined($patchcheck_end)) { $end = $patchcheck_end; } elsif ($cherry) { - die "PATCHCHECK_END must be defined with PATCHCHECK_CHERRY\n"; + dodie "PATCHCHECK_END must be defined with PATCHCHECK_CHERRY\n"; } # Get the true sha1's since we can use things like HEAD~3 @@ -3496,7 +3407,7 @@ sub patchcheck { doprint "\nProcessing commit \"$item\"\n\n"; run_command "git checkout $sha1" or - die "Failed to checkout $sha1"; + dodie "Failed to checkout $sha1"; # only clean on the first and last patch if ($item eq $list[0] || @@ -3587,7 +3498,7 @@ sub read_kconfig { } open(KIN, "$kconfig") - or die "Can't open $kconfig"; + or dodie "Can't open $kconfig"; while (<KIN>) { chomp; @@ -3746,7 +3657,7 @@ sub get_depends { $dep =~ s/^[^$valid]*[$valid]+//; } else { - die "this should never happen"; + dodie "this should never happen"; } } @@ -4007,7 +3918,7 @@ sub make_min_config { # update new ignore configs if (defined($ignore_config)) { open (OUT, ">$temp_config") - or die "Can't write to $temp_config"; + or dodie "Can't write to $temp_config"; foreach my $config (keys %save_configs) { print OUT "$save_configs{$config}\n"; } @@ -4035,7 +3946,7 @@ sub make_min_config { # Save off all the current mandatory configs open (OUT, ">$temp_config") - or die "Can't write to $temp_config"; + or dodie "Can't write to $temp_config"; foreach my $config (keys %keep_configs) { print OUT "$keep_configs{$config}\n"; } @@ -4222,6 +4133,74 @@ sub set_test_option { return eval_option($name, $option, $i); } +sub find_mailer { + my ($mailer) = @_; + + my @paths = split /:/, $ENV{PATH}; + + # sendmail is usually in /usr/sbin + $paths[$#paths + 1] = "/usr/sbin"; + + foreach my $path (@paths) { + if (-x "$path/$mailer") { + return $path; + } + } + + return undef; +} + +sub do_send_mail { + my ($subject, $message) = @_; + + if (!defined($mail_path)) { + # find the mailer + $mail_path = find_mailer $mailer; + if (!defined($mail_path)) { + die "\nCan not find $mailer in PATH\n"; + } + } + + if (!defined($mail_command)) { + if ($mailer eq "mail" || $mailer eq "mailx") { + $mail_command = "\$MAIL_PATH/\$MAILER -s \'\$SUBJECT\' \$MAILTO <<< \'\$MESSAGE\'"; + } elsif ($mailer eq "sendmail" ) { + $mail_command = "echo \'Subject: \$SUBJECT\n\n\$MESSAGE\' | \$MAIL_PATH/\$MAILER -t \$MAILTO"; + } else { + die "\nYour mailer: $mailer is not supported.\n"; + } + } + + $mail_command =~ s/\$MAILER/$mailer/g; + $mail_command =~ s/\$MAIL_PATH/$mail_path/g; + $mail_command =~ s/\$MAILTO/$mailto/g; + $mail_command =~ s/\$SUBJECT/$subject/g; + $mail_command =~ s/\$MESSAGE/$message/g; + + run_command $mail_command; +} + +sub send_email { + + if (defined($mailto)) { + if (!defined($mailer)) { + doprint "No email sent: email or mailer not specified in config.\n"; + return; + } + do_send_mail @_; + } +} + +sub cancel_test { + if ($email_when_canceled) { + send_email("KTEST: Your [$test_type] test was cancelled", + "Your test started at $script_start_time was cancelled: sig int"); + } + die "\nCaught Sig Int, test interrupted: $!\n" +} + +$SIG{INT} = qw(cancel_test); + # First we need to do is the builds for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { @@ -4245,11 +4224,11 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { $outputdir = set_test_option("OUTPUT_DIR", $i); $builddir = set_test_option("BUILD_DIR", $i); - chdir $builddir || die "can't change directory to $builddir"; + chdir $builddir || dodie "can't change directory to $builddir"; if (!-d $outputdir) { mkpath($outputdir) or - die "can't create $outputdir"; + dodie "can't create $outputdir"; } $make = "$makecmd O=$outputdir"; @@ -4262,9 +4241,15 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { $start_minconfig_defined = 1; # The first test may override the PRE_KTEST option - if (defined($pre_ktest) && $i == 1) { - doprint "\n"; - run_command $pre_ktest; + if ($i == 1) { + if (defined($pre_ktest)) { + doprint "\n"; + run_command $pre_ktest; + } + if ($email_when_started) { + send_email("KTEST: Your [$test_type] test was started", + "Your test was started on $script_start_time"); + } } # Any test can override the POST_KTEST option @@ -4280,7 +4265,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { if (!-d $tmpdir) { mkpath($tmpdir) or - die "can't create $tmpdir"; + dodie "can't create $tmpdir"; } $ENV{"SSH_USER"} = $ssh_user; @@ -4353,7 +4338,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { if (defined($checkout)) { run_command "git checkout $checkout" or - die "failed to checkout $checkout"; + dodie "failed to checkout $checkout"; } $no_reboot = 0; @@ -4428,4 +4413,8 @@ if ($opt{"POWEROFF_ON_SUCCESS"}) { doprint "\n $successes of $opt{NUM_TESTS} tests were successful\n\n"; +if ($email_when_finished) { + send_email("KTEST: Your [$test_type] test has finished!", + "$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!"); +} exit 0; diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf index 6c58cd8bbbae..6ca6ca0ce695 100644 --- a/tools/testing/ktest/sample.conf +++ b/tools/testing/ktest/sample.conf @@ -1,6 +1,11 @@ # # Config file for ktest.pl # +# Place your customized version of this, in the working directory that +# ktest.pl is run from. By default, ktest.pl will look for a file +# called "ktest.conf", but you can name it anything you like and specify +# the name of your config file as the first argument of ktest.pl. +# # Note, all paths must be absolute # @@ -396,6 +401,44 @@ #### Optional Config Options (all have defaults) #### +# Email options for receiving notifications. Users must setup +# the specified mailer prior to using this feature. +# +# (default undefined) +#MAILTO = +# +# Supported mailers: sendmail, mail, mailx +# (default sendmail) +#MAILER = sendmail +# +# The executable to run +# (default: for sendmail "/usr/sbin/sendmail", otherwise equals ${MAILER}) +#MAIL_EXEC = /usr/sbin/sendmail +# +# The command used to send mail, which uses the above options +# can be modified. By default if the mailer is "sendmail" then +# MAIL_COMMAND = echo \'Subject: $SUBJECT\n\n$MESSAGE\' | $MAIL_PATH/$MAILER -t $MAILTO +# For mail or mailx: +# MAIL_COMMAND = "$MAIL_PATH/$MAILER -s \'$SUBJECT\' $MAILTO <<< \'$MESSAGE\' +# ktest.pl will do the substitution for MAIL_PATH, MAILER, MAILTO at the time +# it sends the mail if "$FOO" format is used. If "${FOO}" format is used, +# then the substitutions will occur at the time the config file is read. +# But note, MAIL_PATH and MAILER require being set by the config file if +# ${MAIL_PATH} or ${MAILER} are used, but not if $MAIL_PATH or $MAILER are. +#MAIL_COMMAND = echo \'Subject: $SUBJECT\n\n$MESSAGE\' | $MAIL_PATH/$MAILER -t $MAILTO +# +# Errors are defined as those would terminate the script +# (default 1) +#EMAIL_ON_ERROR = 1 +# (default 1) +#EMAIL_WHEN_FINISHED = 1 +# (default 0) +#EMAIL_WHEN_STARTED = 1 +# +# Users can cancel the test by Ctrl^C +# (default 0) +#EMAIL_WHEN_CANCELED = 1 + # Start a test setup. If you leave this off, all options # will be default and the test will run once. # This is a label and not really an option (it takes no value). @@ -725,6 +768,13 @@ # (default 120) #TIMEOUT = 120 +# The timeout in seconds when to test if the box can be rebooted +# or not. Before issuing the reboot command, a ssh connection +# is attempted to see if the target machine is still active. +# If the target does not connect within this timeout, a power cycle +# is issued instead of a reboot. +# CONNECT_TIMEOUT = 25 + # In between tests, a reboot of the box may occur, and this # is the time to wait for the console after it stops producing # output. Some machines may not produce a large lag on reboot @@ -1167,6 +1217,16 @@ # Set it to "good" to test only the good config and set it # to "bad" to only test the bad config. # +# CONFIG_BISECT_EXEC (optional) +# The config bisect is a separate program that comes with ktest.pl. +# By befault, it will look for: +# `pwd`/config-bisect.pl # the location ktest.pl was executed from. +# If it does not find it there, it will look for: +# `dirname <ktest.pl>`/config-bisect.pl # The directory that holds ktest.pl +# If it does not find it there, it will look for: +# ${BUILD_DIR}/tools/testing/ktest/config-bisect.pl +# Setting CONFIG_BISECT_EXEC will override where it looks. +# # Example: # TEST_START # TEST_TYPE = config_bisect diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 620fa78b3b1b..4ea385be528f 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -104,7 +104,8 @@ enum { NUM_HINTS = 8, NUM_BDW = NUM_DCR, NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW, - NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */, + NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + + 4 /* spa1 iset */ + 1 /* spa11 iset */, DIMM_SIZE = SZ_32M, LABEL_SIZE = SZ_128K, SPA_VCD_SIZE = SZ_4M, @@ -137,6 +138,7 @@ static u32 handle[] = { }; static unsigned long dimm_fail_cmd_flags[NUM_DCR]; +static int dimm_fail_cmd_code[NUM_DCR]; struct nfit_test_fw { enum intel_fw_update_state state; @@ -153,6 +155,7 @@ struct nfit_test { void *nfit_buf; dma_addr_t nfit_dma; size_t nfit_size; + size_t nfit_filled; int dcr_idx; int num_dcr; int num_pm; @@ -709,7 +712,9 @@ static void smart_notify(struct device *bus_dev, >= thresh->media_temperature) || ((thresh->alarm_control & ND_INTEL_SMART_CTEMP_TRIP) && smart->ctrl_temperature - >= thresh->ctrl_temperature)) { + >= thresh->ctrl_temperature) + || (smart->health != ND_INTEL_SMART_NON_CRITICAL_HEALTH) + || (smart->shutdown_state != 0)) { device_lock(bus_dev); __acpi_nvdimm_notify(dimm_dev, 0x81); device_unlock(bus_dev); @@ -735,6 +740,32 @@ static int nfit_test_cmd_smart_set_threshold( return 0; } +static int nfit_test_cmd_smart_inject( + struct nd_intel_smart_inject *inj, + unsigned int buf_len, + struct nd_intel_smart_threshold *thresh, + struct nd_intel_smart *smart, + struct device *bus_dev, struct device *dimm_dev) +{ + if (buf_len != sizeof(*inj)) + return -EINVAL; + + if (inj->mtemp_enable) + smart->media_temperature = inj->media_temperature; + if (inj->spare_enable) + smart->spares = inj->spares; + if (inj->fatal_enable) + smart->health = ND_INTEL_SMART_FATAL_HEALTH; + if (inj->unsafe_shutdown_enable) { + smart->shutdown_state = 1; + smart->shutdown_count++; + } + inj->status = 0; + smart_notify(bus_dev, dimm_dev, smart, thresh); + + return 0; +} + static void uc_error_notify(struct work_struct *work) { struct nfit_test *t = container_of(work, typeof(*t), work); @@ -862,8 +893,11 @@ static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func) if (i >= ARRAY_SIZE(handle)) return -ENXIO; - if ((1 << func) & dimm_fail_cmd_flags[i]) + if ((1 << func) & dimm_fail_cmd_flags[i]) { + if (dimm_fail_cmd_code[i]) + return dimm_fail_cmd_code[i]; return -EIO; + } return i; } @@ -935,6 +969,13 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, t->dcr_idx], &t->smart[i - t->dcr_idx], &t->pdev.dev, t->dimm_dev[i]); + case ND_INTEL_SMART_INJECT: + return nfit_test_cmd_smart_inject(buf, + buf_len, + &t->smart_threshold[i - + t->dcr_idx], + &t->smart[i - t->dcr_idx], + &t->pdev.dev, t->dimm_dev[i]); default: return -ENOTTY; } @@ -1125,12 +1166,12 @@ static int ars_state_init(struct device *dev, struct ars_state *ars_state) static void put_dimms(void *data) { - struct device **dimm_dev = data; + struct nfit_test *t = data; int i; - for (i = 0; i < NUM_DCR; i++) - if (dimm_dev[i]) - device_unregister(dimm_dev[i]); + for (i = 0; i < t->num_dcr; i++) + if (t->dimm_dev[i]) + device_unregister(t->dimm_dev[i]); } static struct class *nfit_test_dimm; @@ -1139,13 +1180,11 @@ static int dimm_name_to_id(struct device *dev) { int dimm; - if (sscanf(dev_name(dev), "test_dimm%d", &dimm) != 1 - || dimm >= NUM_DCR || dimm < 0) + if (sscanf(dev_name(dev), "test_dimm%d", &dimm) != 1) return -ENXIO; return dimm; } - static ssize_t handle_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1154,7 +1193,7 @@ static ssize_t handle_show(struct device *dev, struct device_attribute *attr, if (dimm < 0) return dimm; - return sprintf(buf, "%#x", handle[dimm]); + return sprintf(buf, "%#x\n", handle[dimm]); } DEVICE_ATTR_RO(handle); @@ -1188,8 +1227,39 @@ static ssize_t fail_cmd_store(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RW(fail_cmd); +static ssize_t fail_cmd_code_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + int dimm = dimm_name_to_id(dev); + + if (dimm < 0) + return dimm; + + return sprintf(buf, "%d\n", dimm_fail_cmd_code[dimm]); +} + +static ssize_t fail_cmd_code_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + int dimm = dimm_name_to_id(dev); + unsigned long val; + ssize_t rc; + + if (dimm < 0) + return dimm; + + rc = kstrtol(buf, 0, &val); + if (rc) + return rc; + + dimm_fail_cmd_code[dimm] = val; + return size; +} +static DEVICE_ATTR_RW(fail_cmd_code); + static struct attribute *nfit_test_dimm_attributes[] = { &dev_attr_fail_cmd.attr, + &dev_attr_fail_cmd_code.attr, &dev_attr_handle.attr, NULL, }; @@ -1203,6 +1273,23 @@ static const struct attribute_group *nfit_test_dimm_attribute_groups[] = { NULL, }; +static int nfit_test_dimm_init(struct nfit_test *t) +{ + int i; + + if (devm_add_action_or_reset(&t->pdev.dev, put_dimms, t)) + return -ENOMEM; + for (i = 0; i < t->num_dcr; i++) { + t->dimm_dev[i] = device_create_with_groups(nfit_test_dimm, + &t->pdev.dev, 0, NULL, + nfit_test_dimm_attribute_groups, + "test_dimm%d", i + t->dcr_idx); + if (!t->dimm_dev[i]) + return -ENOMEM; + } + return 0; +} + static void smart_init(struct nfit_test *t) { int i; @@ -1222,7 +1309,7 @@ static void smart_init(struct nfit_test *t) | ND_INTEL_SMART_MTEMP_VALID, .health = ND_INTEL_SMART_NON_CRITICAL_HEALTH, .media_temperature = 23 * 16, - .ctrl_temperature = 30 * 16, + .ctrl_temperature = 25 * 16, .pmic_temperature = 40 * 16, .spares = 75, .alarm_flags = ND_INTEL_SMART_SPARE_TRIP @@ -1298,17 +1385,8 @@ static int nfit_test0_alloc(struct nfit_test *t) if (!t->_fit) return -ENOMEM; - if (devm_add_action_or_reset(&t->pdev.dev, put_dimms, t->dimm_dev)) + if (nfit_test_dimm_init(t)) return -ENOMEM; - for (i = 0; i < NUM_DCR; i++) { - t->dimm_dev[i] = device_create_with_groups(nfit_test_dimm, - &t->pdev.dev, 0, NULL, - nfit_test_dimm_attribute_groups, - "test_dimm%d", i); - if (!t->dimm_dev[i]) - return -ENOMEM; - } - smart_init(t); return ars_state_init(&t->pdev.dev, &t->ars_state); } @@ -1340,6 +1418,8 @@ static int nfit_test1_alloc(struct nfit_test *t) if (!t->spa_set[1]) return -ENOMEM; + if (nfit_test_dimm_init(t)) + return -ENOMEM; smart_init(t); return ars_state_init(&t->pdev.dev, &t->ars_state); } @@ -1366,7 +1446,7 @@ static void nfit_test0_setup(struct nfit_test *t) struct acpi_nfit_data_region *bdw; struct acpi_nfit_flush_address *flush; struct acpi_nfit_capabilities *pcap; - unsigned int offset, i; + unsigned int offset = 0, i; /* * spa0 (interleave first half of dimm0 and dimm1, note storage @@ -1380,93 +1460,102 @@ static void nfit_test0_setup(struct nfit_test *t) spa->range_index = 0+1; spa->address = t->spa_set_dma[0]; spa->length = SPA0_SIZE; + offset += spa->header.length; /* * spa1 (interleave last half of the 4 DIMMS, note storage * does not actually alias the related block-data-window * regions) */ - spa = nfit_buf + sizeof(*spa); + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); spa->range_index = 1+1; spa->address = t->spa_set_dma[1]; spa->length = SPA1_SIZE; + offset += spa->header.length; /* spa2 (dcr0) dimm0 */ - spa = nfit_buf + sizeof(*spa) * 2; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); spa->range_index = 2+1; spa->address = t->dcr_dma[0]; spa->length = DCR_SIZE; + offset += spa->header.length; /* spa3 (dcr1) dimm1 */ - spa = nfit_buf + sizeof(*spa) * 3; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); spa->range_index = 3+1; spa->address = t->dcr_dma[1]; spa->length = DCR_SIZE; + offset += spa->header.length; /* spa4 (dcr2) dimm2 */ - spa = nfit_buf + sizeof(*spa) * 4; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); spa->range_index = 4+1; spa->address = t->dcr_dma[2]; spa->length = DCR_SIZE; + offset += spa->header.length; /* spa5 (dcr3) dimm3 */ - spa = nfit_buf + sizeof(*spa) * 5; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); spa->range_index = 5+1; spa->address = t->dcr_dma[3]; spa->length = DCR_SIZE; + offset += spa->header.length; /* spa6 (bdw for dcr0) dimm0 */ - spa = nfit_buf + sizeof(*spa) * 6; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 6+1; spa->address = t->dimm_dma[0]; spa->length = DIMM_SIZE; + offset += spa->header.length; /* spa7 (bdw for dcr1) dimm1 */ - spa = nfit_buf + sizeof(*spa) * 7; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 7+1; spa->address = t->dimm_dma[1]; spa->length = DIMM_SIZE; + offset += spa->header.length; /* spa8 (bdw for dcr2) dimm2 */ - spa = nfit_buf + sizeof(*spa) * 8; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 8+1; spa->address = t->dimm_dma[2]; spa->length = DIMM_SIZE; + offset += spa->header.length; /* spa9 (bdw for dcr3) dimm3 */ - spa = nfit_buf + sizeof(*spa) * 9; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 9+1; spa->address = t->dimm_dma[3]; spa->length = DIMM_SIZE; + offset += spa->header.length; - offset = sizeof(*spa) * 10; /* mem-region0 (spa0, dimm0) */ memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; @@ -1481,9 +1570,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 2; + offset += memdev->header.length; /* mem-region1 (spa0, dimm1) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map); + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[1]; @@ -1497,9 +1587,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->interleave_index = 0; memdev->interleave_ways = 2; memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; + offset += memdev->header.length; /* mem-region2 (spa1, dimm0) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 2; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[0]; @@ -1513,9 +1604,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->interleave_index = 0; memdev->interleave_ways = 4; memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; + offset += memdev->header.length; /* mem-region3 (spa1, dimm1) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 3; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[1]; @@ -1528,9 +1620,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = SPA0_SIZE/2; memdev->interleave_index = 0; memdev->interleave_ways = 4; + offset += memdev->header.length; /* mem-region4 (spa1, dimm2) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 4; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[2]; @@ -1544,9 +1637,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->interleave_index = 0; memdev->interleave_ways = 4; memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; + offset += memdev->header.length; /* mem-region5 (spa1, dimm3) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 5; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[3]; @@ -1559,9 +1653,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = SPA0_SIZE/2; memdev->interleave_index = 0; memdev->interleave_ways = 4; + offset += memdev->header.length; /* mem-region6 (spa/dcr0, dimm0) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 6; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[0]; @@ -1574,9 +1669,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region7 (spa/dcr1, dimm1) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 7; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[1]; @@ -1589,9 +1685,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region8 (spa/dcr2, dimm2) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 8; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[2]; @@ -1604,9 +1701,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region9 (spa/dcr3, dimm3) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 9; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[3]; @@ -1619,9 +1717,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region10 (spa/bdw0, dimm0) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 10; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[0]; @@ -1634,9 +1733,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region11 (spa/bdw1, dimm1) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 11; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[1]; @@ -1649,9 +1749,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region12 (spa/bdw2, dimm2) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 12; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[2]; @@ -1664,9 +1765,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region13 (spa/dcr3, dimm3) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 13; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[3]; @@ -1680,12 +1782,12 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->interleave_index = 0; memdev->interleave_ways = 1; memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; + offset += memdev->header.length; - offset = offset + sizeof(struct acpi_nfit_memory_map) * 14; /* dcr-descriptor0: blk */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; - dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->header.length = sizeof(*dcr); dcr->region_index = 0+1; dcr_common_init(dcr); dcr->serial_number = ~handle[0]; @@ -1696,11 +1798,12 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->command_size = 8; dcr->status_offset = 8; dcr->status_size = 4; + offset += dcr->header.length; /* dcr-descriptor1: blk */ - dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region); + dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; - dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->header.length = sizeof(*dcr); dcr->region_index = 1+1; dcr_common_init(dcr); dcr->serial_number = ~handle[1]; @@ -1711,11 +1814,12 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->command_size = 8; dcr->status_offset = 8; dcr->status_size = 4; + offset += dcr->header.length; /* dcr-descriptor2: blk */ - dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 2; + dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; - dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->header.length = sizeof(*dcr); dcr->region_index = 2+1; dcr_common_init(dcr); dcr->serial_number = ~handle[2]; @@ -1726,11 +1830,12 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->command_size = 8; dcr->status_offset = 8; dcr->status_size = 4; + offset += dcr->header.length; /* dcr-descriptor3: blk */ - dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 3; + dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; - dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->header.length = sizeof(*dcr); dcr->region_index = 3+1; dcr_common_init(dcr); dcr->serial_number = ~handle[3]; @@ -1741,8 +1846,8 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->command_size = 8; dcr->status_offset = 8; dcr->status_size = 4; + offset += dcr->header.length; - offset = offset + sizeof(struct acpi_nfit_control_region) * 4; /* dcr-descriptor0: pmem */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; @@ -1753,10 +1858,10 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->serial_number = ~handle[0]; dcr->code = NFIT_FIC_BYTEN; dcr->windows = 0; + offset += dcr->header.length; /* dcr-descriptor1: pmem */ - dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region, - window_size); + dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = offsetof(struct acpi_nfit_control_region, window_size); @@ -1765,10 +1870,10 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->serial_number = ~handle[1]; dcr->code = NFIT_FIC_BYTEN; dcr->windows = 0; + offset += dcr->header.length; /* dcr-descriptor2: pmem */ - dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region, - window_size) * 2; + dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = offsetof(struct acpi_nfit_control_region, window_size); @@ -1777,10 +1882,10 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->serial_number = ~handle[2]; dcr->code = NFIT_FIC_BYTEN; dcr->windows = 0; + offset += dcr->header.length; /* dcr-descriptor3: pmem */ - dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region, - window_size) * 3; + dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = offsetof(struct acpi_nfit_control_region, window_size); @@ -1789,54 +1894,56 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->serial_number = ~handle[3]; dcr->code = NFIT_FIC_BYTEN; dcr->windows = 0; + offset += dcr->header.length; - offset = offset + offsetof(struct acpi_nfit_control_region, - window_size) * 4; /* bdw0 (spa/dcr0, dimm0) */ bdw = nfit_buf + offset; bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; - bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->header.length = sizeof(*bdw); bdw->region_index = 0+1; bdw->windows = 1; bdw->offset = 0; bdw->size = BDW_SIZE; bdw->capacity = DIMM_SIZE; bdw->start_address = 0; + offset += bdw->header.length; /* bdw1 (spa/dcr1, dimm1) */ - bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region); + bdw = nfit_buf + offset; bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; - bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->header.length = sizeof(*bdw); bdw->region_index = 1+1; bdw->windows = 1; bdw->offset = 0; bdw->size = BDW_SIZE; bdw->capacity = DIMM_SIZE; bdw->start_address = 0; + offset += bdw->header.length; /* bdw2 (spa/dcr2, dimm2) */ - bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 2; + bdw = nfit_buf + offset; bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; - bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->header.length = sizeof(*bdw); bdw->region_index = 2+1; bdw->windows = 1; bdw->offset = 0; bdw->size = BDW_SIZE; bdw->capacity = DIMM_SIZE; bdw->start_address = 0; + offset += bdw->header.length; /* bdw3 (spa/dcr3, dimm3) */ - bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 3; + bdw = nfit_buf + offset; bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; - bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->header.length = sizeof(*bdw); bdw->region_index = 3+1; bdw->windows = 1; bdw->offset = 0; bdw->size = BDW_SIZE; bdw->capacity = DIMM_SIZE; bdw->start_address = 0; + offset += bdw->header.length; - offset = offset + sizeof(struct acpi_nfit_data_region) * 4; /* flush0 (dimm0) */ flush = nfit_buf + offset; flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; @@ -1845,48 +1952,52 @@ static void nfit_test0_setup(struct nfit_test *t) flush->hint_count = NUM_HINTS; for (i = 0; i < NUM_HINTS; i++) flush->hint_address[i] = t->flush_dma[0] + i * sizeof(u64); + offset += flush->header.length; /* flush1 (dimm1) */ - flush = nfit_buf + offset + flush_hint_size * 1; + flush = nfit_buf + offset; flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; flush->header.length = flush_hint_size; flush->device_handle = handle[1]; flush->hint_count = NUM_HINTS; for (i = 0; i < NUM_HINTS; i++) flush->hint_address[i] = t->flush_dma[1] + i * sizeof(u64); + offset += flush->header.length; /* flush2 (dimm2) */ - flush = nfit_buf + offset + flush_hint_size * 2; + flush = nfit_buf + offset; flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; flush->header.length = flush_hint_size; flush->device_handle = handle[2]; flush->hint_count = NUM_HINTS; for (i = 0; i < NUM_HINTS; i++) flush->hint_address[i] = t->flush_dma[2] + i * sizeof(u64); + offset += flush->header.length; /* flush3 (dimm3) */ - flush = nfit_buf + offset + flush_hint_size * 3; + flush = nfit_buf + offset; flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; flush->header.length = flush_hint_size; flush->device_handle = handle[3]; flush->hint_count = NUM_HINTS; for (i = 0; i < NUM_HINTS; i++) flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64); + offset += flush->header.length; /* platform capabilities */ - pcap = nfit_buf + offset + flush_hint_size * 4; + pcap = nfit_buf + offset; pcap->header.type = ACPI_NFIT_TYPE_CAPABILITIES; pcap->header.length = sizeof(*pcap); pcap->highest_capability = 1; pcap->capabilities = ACPI_NFIT_CAPABILITY_CACHE_FLUSH | ACPI_NFIT_CAPABILITY_MEM_FLUSH; + offset += pcap->header.length; if (t->setup_hotplug) { - offset = offset + flush_hint_size * 4 + sizeof(*pcap); /* dcr-descriptor4: blk */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; - dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->header.length = sizeof(*dcr); dcr->region_index = 8+1; dcr_common_init(dcr); dcr->serial_number = ~handle[4]; @@ -1897,8 +2008,8 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->command_size = 8; dcr->status_offset = 8; dcr->status_size = 4; + offset += dcr->header.length; - offset = offset + sizeof(struct acpi_nfit_control_region); /* dcr-descriptor4: pmem */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; @@ -1909,21 +2020,20 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->serial_number = ~handle[4]; dcr->code = NFIT_FIC_BYTEN; dcr->windows = 0; + offset += dcr->header.length; - offset = offset + offsetof(struct acpi_nfit_control_region, - window_size); /* bdw4 (spa/dcr4, dimm4) */ bdw = nfit_buf + offset; bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; - bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->header.length = sizeof(*bdw); bdw->region_index = 8+1; bdw->windows = 1; bdw->offset = 0; bdw->size = BDW_SIZE; bdw->capacity = DIMM_SIZE; bdw->start_address = 0; + offset += bdw->header.length; - offset = offset + sizeof(struct acpi_nfit_data_region); /* spa10 (dcr4) dimm4 */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; @@ -1932,30 +2042,32 @@ static void nfit_test0_setup(struct nfit_test *t) spa->range_index = 10+1; spa->address = t->dcr_dma[4]; spa->length = DCR_SIZE; + offset += spa->header.length; /* * spa11 (single-dimm interleave for hotplug, note storage * does not actually alias the related block-data-window * regions) */ - spa = nfit_buf + offset + sizeof(*spa); + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); spa->range_index = 11+1; spa->address = t->spa_set_dma[2]; spa->length = SPA0_SIZE; + offset += spa->header.length; /* spa12 (bdw for dcr4) dimm4 */ - spa = nfit_buf + offset + sizeof(*spa) * 2; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 12+1; spa->address = t->dimm_dma[4]; spa->length = DIMM_SIZE; + offset += spa->header.length; - offset = offset + sizeof(*spa) * 3; /* mem-region14 (spa/dcr4, dimm4) */ memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; @@ -1970,10 +2082,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; - /* mem-region15 (spa0, dimm4) */ - memdev = nfit_buf + offset + - sizeof(struct acpi_nfit_memory_map); + /* mem-region15 (spa11, dimm4) */ + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[4]; @@ -1987,10 +2099,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->interleave_index = 0; memdev->interleave_ways = 1; memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; + offset += memdev->header.length; /* mem-region16 (spa/bdw4, dimm4) */ - memdev = nfit_buf + offset + - sizeof(struct acpi_nfit_memory_map) * 2; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[4]; @@ -2003,8 +2115,8 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; - offset = offset + sizeof(struct acpi_nfit_memory_map) * 3; /* flush3 (dimm4) */ flush = nfit_buf + offset; flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; @@ -2014,8 +2126,14 @@ static void nfit_test0_setup(struct nfit_test *t) for (i = 0; i < NUM_HINTS; i++) flush->hint_address[i] = t->flush_dma[4] + i * sizeof(u64); + offset += flush->header.length; + + /* sanity check to make sure we've filled the buffer */ + WARN_ON(offset != t->nfit_size); } + t->nfit_filled = offset; + post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0], SPA0_SIZE); @@ -2026,6 +2144,7 @@ static void nfit_test0_setup(struct nfit_test *t) set_bit(ND_INTEL_SMART, &acpi_desc->dimm_cmd_force_en); set_bit(ND_INTEL_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en); set_bit(ND_INTEL_SMART_SET_THRESHOLD, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_SMART_INJECT, &acpi_desc->dimm_cmd_force_en); set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); @@ -2061,17 +2180,18 @@ static void nfit_test1_setup(struct nfit_test *t) spa->range_index = 0+1; spa->address = t->spa_set_dma[0]; spa->length = SPA2_SIZE; + offset += spa->header.length; /* virtual cd region */ - spa = nfit_buf + sizeof(*spa); + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16); spa->range_index = 0; spa->address = t->spa_set_dma[1]; spa->length = SPA_VCD_SIZE; + offset += spa->header.length; - offset += sizeof(*spa) * 2; /* mem-region0 (spa0, dimm0) */ memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; @@ -2089,8 +2209,8 @@ static void nfit_test1_setup(struct nfit_test *t) memdev->flags = ACPI_NFIT_MEM_SAVE_FAILED | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED | ACPI_NFIT_MEM_HEALTH_OBSERVED | ACPI_NFIT_MEM_NOT_ARMED; + offset += memdev->header.length; - offset += sizeof(*memdev); /* dcr-descriptor0 */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; @@ -2101,8 +2221,8 @@ static void nfit_test1_setup(struct nfit_test *t) dcr->serial_number = ~handle[5]; dcr->code = NFIT_FIC_BYTE; dcr->windows = 0; - offset += dcr->header.length; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); @@ -2117,9 +2237,9 @@ static void nfit_test1_setup(struct nfit_test *t) memdev->interleave_index = 0; memdev->interleave_ways = 1; memdev->flags = ACPI_NFIT_MEM_MAP_FAILED; + offset += memdev->header.length; /* dcr-descriptor1 */ - offset += sizeof(*memdev); dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = offsetof(struct acpi_nfit_control_region, @@ -2129,6 +2249,12 @@ static void nfit_test1_setup(struct nfit_test *t) dcr->serial_number = ~handle[6]; dcr->code = NFIT_FIC_BYTE; dcr->windows = 0; + offset += dcr->header.length; + + /* sanity check to make sure we've filled the buffer */ + WARN_ON(offset != t->nfit_size); + + t->nfit_filled = offset; post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0], SPA2_SIZE); @@ -2139,6 +2265,9 @@ static void nfit_test1_setup(struct nfit_test *t) set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); } static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, @@ -2487,7 +2616,7 @@ static int nfit_test_probe(struct platform_device *pdev) nd_desc->ndctl = nfit_test_ctl; rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf, - nfit_test->nfit_size); + nfit_test->nfit_filled); if (rc) return rc; diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h index 428344519cdf..33752e06ff8d 100644 --- a/tools/testing/nvdimm/test/nfit_test.h +++ b/tools/testing/nvdimm/test/nfit_test.h @@ -93,6 +93,7 @@ struct nd_cmd_ars_err_inj_stat { #define ND_INTEL_FW_FINISH_UPDATE 15 #define ND_INTEL_FW_FINISH_QUERY 16 #define ND_INTEL_SMART_SET_THRESHOLD 17 +#define ND_INTEL_SMART_INJECT 18 #define ND_INTEL_SMART_HEALTH_VALID (1 << 0) #define ND_INTEL_SMART_SPARES_VALID (1 << 1) @@ -111,6 +112,10 @@ struct nd_cmd_ars_err_inj_stat { #define ND_INTEL_SMART_NON_CRITICAL_HEALTH (1 << 0) #define ND_INTEL_SMART_CRITICAL_HEALTH (1 << 1) #define ND_INTEL_SMART_FATAL_HEALTH (1 << 2) +#define ND_INTEL_SMART_INJECT_MTEMP (1 << 0) +#define ND_INTEL_SMART_INJECT_SPARE (1 << 1) +#define ND_INTEL_SMART_INJECT_FATAL (1 << 2) +#define ND_INTEL_SMART_INJECT_SHUTDOWN (1 << 3) struct nd_intel_smart { __u32 status; @@ -158,6 +163,17 @@ struct nd_intel_smart_set_threshold { __u32 status; } __packed; +struct nd_intel_smart_inject { + __u64 flags; + __u8 mtemp_enable; + __u16 media_temperature; + __u8 spare_enable; + __u8 spares; + __u8 fatal_enable; + __u8 unsafe_shutdown_enable; + __u32 status; +} __packed; + #define INTEL_FW_STORAGE_SIZE 0x100000 #define INTEL_FW_MAX_SEND_LEN 0xFFEC #define INTEL_FW_QUERY_INTERVAL 250000 diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index fa7ee369b3c9..db66f8a0d4be 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -17,7 +17,7 @@ ifeq ($(BUILD), 32) LDFLAGS += -m32 endif -targets: mapshift $(TARGETS) +targets: generated/map-shift.h $(TARGETS) main: $(OFILES) @@ -42,9 +42,7 @@ radix-tree.c: ../../../lib/radix-tree.c idr.c: ../../../lib/idr.c sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@ -.PHONY: mapshift - -mapshift: +generated/map-shift.h: @if ! grep -qws $(SHIFT) generated/map-shift.h; then \ echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" > \ generated/map-shift.h; \ diff --git a/tools/testing/radix-tree/linux/gfp.h b/tools/testing/radix-tree/linux/gfp.h index e3201ccf54c3..32159c08a52e 100644 --- a/tools/testing/radix-tree/linux/gfp.h +++ b/tools/testing/radix-tree/linux/gfp.h @@ -19,6 +19,7 @@ #define __GFP_RECLAIM (__GFP_DIRECT_RECLAIM|__GFP_KSWAPD_RECLAIM) +#define GFP_ZONEMASK 0x0fu #define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) #define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) #define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 59245b3d587c..7bf405638b0b 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -16,6 +16,7 @@ #include <linux/radix-tree.h> #include <linux/slab.h> #include <linux/errno.h> +#include <pthread.h> #include "test.h" @@ -624,6 +625,67 @@ static void multiorder_account(void) item_kill_tree(&tree); } +bool stop_iteration = false; + +static void *creator_func(void *ptr) +{ + /* 'order' is set up to ensure we have sibling entries */ + unsigned int order = RADIX_TREE_MAP_SHIFT - 1; + struct radix_tree_root *tree = ptr; + int i; + + for (i = 0; i < 10000; i++) { + item_insert_order(tree, 0, order); + item_delete_rcu(tree, 0); + } + + stop_iteration = true; + return NULL; +} + +static void *iterator_func(void *ptr) +{ + struct radix_tree_root *tree = ptr; + struct radix_tree_iter iter; + struct item *item; + void **slot; + + while (!stop_iteration) { + rcu_read_lock(); + radix_tree_for_each_slot(slot, tree, &iter, 0) { + item = radix_tree_deref_slot(slot); + + if (!item) + continue; + if (radix_tree_deref_retry(item)) { + slot = radix_tree_iter_retry(&iter); + continue; + } + + item_sanity(item, iter.index); + } + rcu_read_unlock(); + } + return NULL; +} + +static void multiorder_iteration_race(void) +{ + const int num_threads = sysconf(_SC_NPROCESSORS_ONLN); + pthread_t worker_thread[num_threads]; + RADIX_TREE(tree, GFP_KERNEL); + int i; + + pthread_create(&worker_thread[0], NULL, &creator_func, &tree); + for (i = 1; i < num_threads; i++) + pthread_create(&worker_thread[i], NULL, &iterator_func, &tree); + + for (i = 0; i < num_threads; i++) + pthread_join(worker_thread[i], NULL); + + item_kill_tree(&tree); +} + void multiorder_checks(void) { int i; @@ -644,6 +706,7 @@ void multiorder_checks(void) multiorder_join(); multiorder_split(); multiorder_account(); + multiorder_iteration_race(); radix_tree_cpu_dead(0); } diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c index 5978ab1f403d..def6015570b2 100644 --- a/tools/testing/radix-tree/test.c +++ b/tools/testing/radix-tree/test.c @@ -75,6 +75,25 @@ int item_delete(struct radix_tree_root *root, unsigned long index) return 0; } +static void item_free_rcu(struct rcu_head *head) +{ + struct item *item = container_of(head, struct item, rcu_head); + + free(item); +} + +int item_delete_rcu(struct radix_tree_root *root, unsigned long index) +{ + struct item *item = radix_tree_delete(root, index); + + if (item) { + item_sanity(item, index); + call_rcu(&item->rcu_head, item_free_rcu); + return 1; + } + return 0; +} + void item_check_present(struct radix_tree_root *root, unsigned long index) { struct item *item; diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index d9c031dbeb1a..31f1d9b6f506 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -5,6 +5,7 @@ #include <linux/rcupdate.h> struct item { + struct rcu_head rcu_head; unsigned long index; unsigned int order; }; @@ -12,9 +13,11 @@ struct item { struct item *item_create(unsigned long index, unsigned int order); int __item_insert(struct radix_tree_root *root, struct item *item); int item_insert(struct radix_tree_root *root, unsigned long index); +void item_sanity(struct item *item, unsigned long index); int item_insert_order(struct radix_tree_root *root, unsigned long index, unsigned order); int item_delete(struct radix_tree_root *root, unsigned long index); +int item_delete_rcu(struct radix_tree_root *root, unsigned long index); struct item *item_lookup(struct radix_tree_root *root, unsigned long index); void item_check_present(struct radix_tree_root *root, unsigned long index); diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 2fc410bc4f33..32aafa92074c 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -25,6 +25,7 @@ TARGETS += mqueue TARGETS += net TARGETS += nsfs TARGETS += powerpc +TARGETS += proc TARGETS += pstore TARGETS += ptrace TARGETS += seccomp diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 9cf83f895d98..5e1ab2f0eb79 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -12,3 +12,6 @@ test_tcpbpf_user test_verifier_log feature test_libbpf_open +test_sock +test_sock_addr +urandom_read diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index faadbe233966..4123d0ab90ba 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1108,7 +1108,7 @@ static void test_stacktrace_build_id(void) assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null") == 0); - assert(system("./urandom_read if=/dev/urandom of=/dev/zero count=4 2> /dev/null") == 0); + assert(system("./urandom_read") == 0); /* disable stack trace collection */ key = 0; val = 1; @@ -1158,7 +1158,7 @@ static void test_stacktrace_build_id(void) } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0); CHECK(build_id_matches < 1, "build id match", - "Didn't find expected build ID from the map"); + "Didn't find expected build ID from the map\n"); disable_pmu: ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c index 73bb20cfb9b7..f4d99fabc56d 100644 --- a/tools/testing/selftests/bpf/test_sock.c +++ b/tools/testing/selftests/bpf/test_sock.c @@ -13,6 +13,7 @@ #include <bpf/bpf.h> #include "cgroup_helpers.h" +#include "bpf_rlimit.h" #ifndef ARRAY_SIZE # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index d488f20926e8..2950f80ba7fb 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -15,6 +15,7 @@ #include <bpf/libbpf.h> #include "cgroup_helpers.h" +#include "bpf_rlimit.h" #define CG_PATH "/foo" #define CONNECT4_PROG_PATH "./connect4_prog.o" diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh index c6e1dcf992c4..9832a875a828 100755 --- a/tools/testing/selftests/bpf/test_sock_addr.sh +++ b/tools/testing/selftests/bpf/test_sock_addr.sh @@ -4,7 +4,7 @@ set -eu ping_once() { - ping -q -c 1 -W 1 ${1%%/*} >/dev/null 2>&1 + ping -${1} -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1 } wait_for_ip() @@ -13,7 +13,7 @@ wait_for_ip() echo -n "Wait for testing IPv4/IPv6 to become available " for _i in $(seq ${MAX_PING_TRIES}); do echo -n "." - if ping_once ${TEST_IPv4} && ping_once ${TEST_IPv6}; then + if ping_once 4 ${TEST_IPv4} && ping_once 6 ${TEST_IPv6}; then echo " OK" return fi diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 3e7718b1a9ae..fd7de7eb329e 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -11713,6 +11713,11 @@ static void get_unpriv_disabled() FILE *fd; fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r"); + if (!fd) { + perror("fopen /proc/sys/"UNPRIV_SYSCTL); + unpriv_disabled = true; + return; + } if (fgets(buf, 2, fd) == buf && atoi(buf)) unpriv_disabled = true; fclose(fd); diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile index 4e6d09fb166f..5c7d7001ad37 100644 --- a/tools/testing/selftests/filesystems/Makefile +++ b/tools/testing/selftests/filesystems/Makefile @@ -1,8 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS := dnotify_test devpts_pts -all: $(TEST_PROGS) -include ../lib.mk +TEST_GEN_PROGS := devpts_pts +TEST_GEN_PROGS_EXTENDED := dnotify_test -clean: - rm -fr $(TEST_PROGS) +include ../lib.mk diff --git a/tools/testing/selftests/firmware/Makefile b/tools/testing/selftests/firmware/Makefile index 826f38d5dd19..261c81f08606 100644 --- a/tools/testing/selftests/firmware/Makefile +++ b/tools/testing/selftests/firmware/Makefile @@ -4,6 +4,7 @@ all: TEST_PROGS := fw_run_tests.sh +TEST_FILES := fw_fallback.sh fw_filesystem.sh fw_lib.sh include ../lib.mk diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh index 9ea31b57d71a..962d7f4ac627 100755 --- a/tools/testing/selftests/firmware/fw_lib.sh +++ b/tools/testing/selftests/firmware/fw_lib.sh @@ -154,11 +154,13 @@ test_finish() if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout fi - if [ "$OLD_FWPATH" = "" ]; then - OLD_FWPATH=" " - fi if [ "$TEST_REQS_FW_SET_CUSTOM_PATH" = "yes" ]; then - echo -n "$OLD_FWPATH" >/sys/module/firmware_class/parameters/path + if [ "$OLD_FWPATH" = "" ]; then + # A zero-length write won't work; write a null byte + printf '\000' >/sys/module/firmware_class/parameters/path + else + echo -n "$OLD_FWPATH" >/sys/module/firmware_class/parameters/path + fi fi if [ -f $FW ]; then rm -f "$FW" diff --git a/tools/testing/selftests/firmware/fw_run_tests.sh b/tools/testing/selftests/firmware/fw_run_tests.sh index 06d638e9dc62..cffdd4eb0a57 100755 --- a/tools/testing/selftests/firmware/fw_run_tests.sh +++ b/tools/testing/selftests/firmware/fw_run_tests.sh @@ -66,5 +66,5 @@ if [ -f $FW_FORCE_SYSFS_FALLBACK ]; then run_test_config_0003 else echo "Running basic kernel configuration, working with your config" - run_test + run_tests fi diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index df3dd7fe5f9b..2a4f16fc9819 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -59,6 +59,13 @@ disable_events() { echo 0 > events/enable } +clear_synthetic_events() { # reset all current synthetic events + grep -v ^# synthetic_events | + while read line; do + echo "!$line" >> synthetic_events + done +} + initialize_ftrace() { # Reset ftrace to initial-state # As the initial state, ftrace will be set to nop tracer, # no events, no triggers, no filters, no function filters, diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc new file mode 100644 index 000000000000..2aabab363cfb --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc @@ -0,0 +1,39 @@ +#!/bin/sh +# description: event trigger - test extended error support + + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +echo "Test extended error support" +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger +! echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger 2> /dev/null +if ! grep -q "ERROR:" events/sched/sched_wakeup/hist; then + fail "Failed to generate extended error in histogram" +fi + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc new file mode 100644 index 000000000000..7fd5b4a8f060 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc @@ -0,0 +1,54 @@ +#!/bin/sh +# description: event trigger - test field variable support + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +clear_synthetic_events +reset_tracer +do_reset + +echo "Test field variable support" + +echo 'wakeup_latency u64 lat; pid_t pid; int prio; char comm[16]' > synthetic_events +echo 'hist:keys=comm:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger +echo 'hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger +echo 'hist:keys=pid,prio,comm:vals=lat:sort=pid,prio' > events/synthetic/wakeup_latency/trigger + +ping localhost -c 3 +if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then + fail "Failed to create inter-event histogram" +fi + +if ! grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then + fail "Failed to create histogram with field variable" +fi + +echo '!hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger + +if grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then + fail "Failed to remove histogram with field variable" +fi + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc new file mode 100644 index 000000000000..c93dbe38b5df --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc @@ -0,0 +1,58 @@ +#!/bin/sh +# description: event trigger - test inter-event combined histogram trigger + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +reset_tracer +do_reset +clear_synthetic_events + +echo "Test create synthetic event" + +echo 'waking_latency u64 lat pid_t pid' > synthetic_events +if [ ! -d events/synthetic/waking_latency ]; then + fail "Failed to create waking_latency synthetic event" +fi + +echo "Test combined histogram" + +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger +echo 'hist:keys=pid:waking_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).waking_latency($waking_lat,pid) if comm=="ping"' > events/sched/sched_wakeup/trigger +echo 'hist:keys=pid,lat:sort=pid,lat' > events/synthetic/waking_latency/trigger + +echo 'wakeup_latency u64 lat pid_t pid' >> synthetic_events +echo 'hist:keys=pid:ts1=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger +echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts1:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid) if next_comm=="ping"' > events/sched/sched_switch/trigger + +echo 'waking+wakeup_latency u64 lat; pid_t pid' >> synthetic_events +echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking+wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger +echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking+wakeup_latency/trigger + +ping localhost -c 3 +if ! grep -q "pid:" events/synthetic/waking+wakeup_latency/hist; then + fail "Failed to create combined histogram" +fi + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc new file mode 100644 index 000000000000..c193dce611a2 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc @@ -0,0 +1,44 @@ +#!/bin/sh +# description: event trigger - test multiple actions on hist trigger + + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +clear_synthetic_events +reset_tracer +do_reset + +echo "Test multiple actions on hist trigger" +echo 'wakeup_latency u64 lat; pid_t pid' >> synthetic_events +TRIGGER1=events/sched/sched_wakeup/trigger +TRIGGER2=events/sched/sched_switch/trigger + +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="cyclictest"' > $TRIGGER1 +echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0 if next_comm=="cyclictest"' >> $TRIGGER2 +echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,next_pid) if next_comm=="cyclictest"' >> $TRIGGER2 +echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,prev_pid) if next_comm=="cyclictest"' >> $TRIGGER2 +echo 'hist:keys=next_pid if next_comm=="cyclictest"' >> $TRIGGER2 + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc new file mode 100644 index 000000000000..e84e7d048566 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc @@ -0,0 +1,50 @@ +#!/bin/sh +# description: event trigger - test inter-event histogram trigger onmatch action + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +clear_synthetic_events +reset_tracer +do_reset + +echo "Test create synthetic event" + +echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events +if [ ! -d events/synthetic/wakeup_latency ]; then + fail "Failed to create wakeup_latency synthetic event" +fi + +echo "Test create histogram for synthetic event" +echo "Test histogram variables,simple expression support and onmatch action" + +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger +echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger +echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger +ping localhost -c 5 +if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then + fail "Failed to create onmatch action inter-event histogram" +fi + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc new file mode 100644 index 000000000000..7907d8aacde3 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc @@ -0,0 +1,50 @@ +#!/bin/sh +# description: event trigger - test inter-event histogram trigger onmatch-onmax action + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +clear_synthetic_events +reset_tracer +do_reset + +echo "Test create synthetic event" + +echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events +if [ ! -d events/synthetic/wakeup_latency ]; then + fail "Failed to create wakeup_latency synthetic event" +fi + +echo "Test create histogram for synthetic event" +echo "Test histogram variables,simple expression support and onmatch-onmax action" + +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger +echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm):onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger +echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger +ping localhost -c 5 +if [ ! grep -q "ping" events/synthetic/wakeup_latency/hist -o ! grep -q "max:" events/sched/sched_switch/hist]; then + fail "Failed to create onmatch-onmax action inter-event histogram" +fi + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc new file mode 100644 index 000000000000..38b7ed6242b2 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc @@ -0,0 +1,48 @@ +#!/bin/sh +# description: event trigger - test inter-event histogram trigger onmax action + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +clear_synthetic_events +reset_tracer +do_reset + +echo "Test create synthetic event" + +echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events +if [ ! -d events/synthetic/wakeup_latency ]; then + fail "Failed to create wakeup_latency synthetic event" +fi + +echo "Test onmax action" + +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_waking/trigger +echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger +ping localhost -c 3 +if ! grep -q "max:" events/sched/sched_switch/hist; then + fail "Failed to create onmax action inter-event histogram" +fi + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc new file mode 100644 index 000000000000..cef11377dcbd --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc @@ -0,0 +1,54 @@ +#!/bin/sh +# description: event trigger - test synthetic event create remove +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +clear_synthetic_events +reset_tracer +do_reset + +echo "Test create synthetic event" + +echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events +if [ ! -d events/synthetic/wakeup_latency ]; then + fail "Failed to create wakeup_latency synthetic event" +fi + +reset_trigger + +echo "Test create synthetic event with an error" +echo 'wakeup_latency u64 lat pid_t pid char' > synthetic_events > /dev/null +if [ -d events/synthetic/wakeup_latency ]; then + fail "Created wakeup_latency synthetic event with an invalid format" +fi + +reset_trigger + +echo "Test remove synthetic event" +echo '!wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events +if [ -d events/synthetic/wakeup_latency ]; then + fail "Failed to delete wakeup_latency synthetic event" +fi + +do_reset + +exit 0 diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index dc44de904797..d9d00319b07c 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -4,17 +4,18 @@ top_srcdir = ../../../../ UNAME_M := $(shell uname -m) LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c -LIBKVM_x86_64 = lib/x86.c +LIBKVM_x86_64 = lib/x86.c lib/vmx.c TEST_GEN_PROGS_x86_64 = set_sregs_test TEST_GEN_PROGS_x86_64 += sync_regs_test +TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) LIBKVM += $(LIBKVM_$(UNAME_M)) INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ -CFLAGS += -O2 -g -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) +CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -I.. # After inclusion, $(OUTPUT) is defined and # $(TEST_GEN_PROGS) starts with $(OUTPUT)/ diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 57974ad46373..637b7017b6ee 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -112,24 +112,27 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, uint32_t memslot); -void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid); +struct kvm_cpuid2 *kvm_get_supported_cpuid(void); void vcpu_set_cpuid( struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid); -struct kvm_cpuid2 *allocate_kvm_cpuid2(void); struct kvm_cpuid_entry2 * -find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function, - uint32_t index); +kvm_get_supported_cpuid_index(uint32_t function, uint32_t index); static inline struct kvm_cpuid_entry2 * -find_cpuid_entry(struct kvm_cpuid2 *cpuid, uint32_t function) +kvm_get_supported_cpuid_entry(uint32_t function) { - return find_cpuid_index_entry(cpuid, function, 0); + return kvm_get_supported_cpuid_index(function, 0); } struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code); void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); +typedef void (*vmx_guest_code_t)(vm_vaddr_t vmxon_vaddr, + vm_paddr_t vmxon_paddr, + vm_vaddr_t vmcs_vaddr, + vm_paddr_t vmcs_paddr); + struct kvm_userspace_memory_region * kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end); diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 7ab98e41324f..ac53730b30aa 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -19,6 +19,7 @@ #include <errno.h> #include <unistd.h> #include <fcntl.h> +#include "kselftest.h" ssize_t test_write(int fd, const void *buf, size_t count); ssize_t test_read(int fd, void *buf, size_t count); diff --git a/tools/testing/selftests/kvm/include/vmx.h b/tools/testing/selftests/kvm/include/vmx.h new file mode 100644 index 000000000000..6ed8499807fd --- /dev/null +++ b/tools/testing/selftests/kvm/include/vmx.h @@ -0,0 +1,494 @@ +/* + * tools/testing/selftests/kvm/include/vmx.h + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + */ + +#ifndef SELFTEST_KVM_VMX_H +#define SELFTEST_KVM_VMX_H + +#include <stdint.h> +#include "x86.h" + +#define CPUID_VMX_BIT 5 + +#define CPUID_VMX (1 << 5) + +/* + * Definitions of Primary Processor-Based VM-Execution Controls. + */ +#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 +#define CPU_BASED_USE_TSC_OFFSETING 0x00000008 +#define CPU_BASED_HLT_EXITING 0x00000080 +#define CPU_BASED_INVLPG_EXITING 0x00000200 +#define CPU_BASED_MWAIT_EXITING 0x00000400 +#define CPU_BASED_RDPMC_EXITING 0x00000800 +#define CPU_BASED_RDTSC_EXITING 0x00001000 +#define CPU_BASED_CR3_LOAD_EXITING 0x00008000 +#define CPU_BASED_CR3_STORE_EXITING 0x00010000 +#define CPU_BASED_CR8_LOAD_EXITING 0x00080000 +#define CPU_BASED_CR8_STORE_EXITING 0x00100000 +#define CPU_BASED_TPR_SHADOW 0x00200000 +#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000 +#define CPU_BASED_MOV_DR_EXITING 0x00800000 +#define CPU_BASED_UNCOND_IO_EXITING 0x01000000 +#define CPU_BASED_USE_IO_BITMAPS 0x02000000 +#define CPU_BASED_MONITOR_TRAP 0x08000000 +#define CPU_BASED_USE_MSR_BITMAPS 0x10000000 +#define CPU_BASED_MONITOR_EXITING 0x20000000 +#define CPU_BASED_PAUSE_EXITING 0x40000000 +#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 + +#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172 + +/* + * Definitions of Secondary Processor-Based VM-Execution Controls. + */ +#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 +#define SECONDARY_EXEC_ENABLE_EPT 0x00000002 +#define SECONDARY_EXEC_DESC 0x00000004 +#define SECONDARY_EXEC_RDTSCP 0x00000008 +#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 +#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 +#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 +#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 +#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 +#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 +#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 +#define SECONDARY_EXEC_RDRAND_EXITING 0x00000800 +#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 +#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 +#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 +#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000 +#define SECONDARY_EXEC_ENABLE_PML 0x00020000 +#define SECONDARY_EPT_VE 0x00040000 +#define SECONDARY_ENABLE_XSAV_RESTORE 0x00100000 +#define SECONDARY_EXEC_TSC_SCALING 0x02000000 + +#define PIN_BASED_EXT_INTR_MASK 0x00000001 +#define PIN_BASED_NMI_EXITING 0x00000008 +#define PIN_BASED_VIRTUAL_NMIS 0x00000020 +#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 +#define PIN_BASED_POSTED_INTR 0x00000080 + +#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 + +#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004 +#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 +#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000 +#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 +#define VM_EXIT_SAVE_IA32_PAT 0x00040000 +#define VM_EXIT_LOAD_IA32_PAT 0x00080000 +#define VM_EXIT_SAVE_IA32_EFER 0x00100000 +#define VM_EXIT_LOAD_IA32_EFER 0x00200000 +#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 + +#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff + +#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004 +#define VM_ENTRY_IA32E_MODE 0x00000200 +#define VM_ENTRY_SMM 0x00000400 +#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 +#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000 +#define VM_ENTRY_LOAD_IA32_PAT 0x00004000 +#define VM_ENTRY_LOAD_IA32_EFER 0x00008000 + +#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff + +#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f +#define VMX_MISC_SAVE_EFER_LMA 0x00000020 + +#define EXIT_REASON_FAILED_VMENTRY 0x80000000 +#define EXIT_REASON_EXCEPTION_NMI 0 +#define EXIT_REASON_EXTERNAL_INTERRUPT 1 +#define EXIT_REASON_TRIPLE_FAULT 2 +#define EXIT_REASON_PENDING_INTERRUPT 7 +#define EXIT_REASON_NMI_WINDOW 8 +#define EXIT_REASON_TASK_SWITCH 9 +#define EXIT_REASON_CPUID 10 +#define EXIT_REASON_HLT 12 +#define EXIT_REASON_INVD 13 +#define EXIT_REASON_INVLPG 14 +#define EXIT_REASON_RDPMC 15 +#define EXIT_REASON_RDTSC 16 +#define EXIT_REASON_VMCALL 18 +#define EXIT_REASON_VMCLEAR 19 +#define EXIT_REASON_VMLAUNCH 20 +#define EXIT_REASON_VMPTRLD 21 +#define EXIT_REASON_VMPTRST 22 +#define EXIT_REASON_VMREAD 23 +#define EXIT_REASON_VMRESUME 24 +#define EXIT_REASON_VMWRITE 25 +#define EXIT_REASON_VMOFF 26 +#define EXIT_REASON_VMON 27 +#define EXIT_REASON_CR_ACCESS 28 +#define EXIT_REASON_DR_ACCESS 29 +#define EXIT_REASON_IO_INSTRUCTION 30 +#define EXIT_REASON_MSR_READ 31 +#define EXIT_REASON_MSR_WRITE 32 +#define EXIT_REASON_INVALID_STATE 33 +#define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_MONITOR_INSTRUCTION 39 +#define EXIT_REASON_PAUSE_INSTRUCTION 40 +#define EXIT_REASON_MCE_DURING_VMENTRY 41 +#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 +#define EXIT_REASON_APIC_ACCESS 44 +#define EXIT_REASON_EOI_INDUCED 45 +#define EXIT_REASON_EPT_VIOLATION 48 +#define EXIT_REASON_EPT_MISCONFIG 49 +#define EXIT_REASON_INVEPT 50 +#define EXIT_REASON_RDTSCP 51 +#define EXIT_REASON_PREEMPTION_TIMER 52 +#define EXIT_REASON_INVVPID 53 +#define EXIT_REASON_WBINVD 54 +#define EXIT_REASON_XSETBV 55 +#define EXIT_REASON_APIC_WRITE 56 +#define EXIT_REASON_INVPCID 58 +#define EXIT_REASON_PML_FULL 62 +#define EXIT_REASON_XSAVES 63 +#define EXIT_REASON_XRSTORS 64 +#define LAST_EXIT_REASON 64 + +enum vmcs_field { + VIRTUAL_PROCESSOR_ID = 0x00000000, + POSTED_INTR_NV = 0x00000002, + GUEST_ES_SELECTOR = 0x00000800, + GUEST_CS_SELECTOR = 0x00000802, + GUEST_SS_SELECTOR = 0x00000804, + GUEST_DS_SELECTOR = 0x00000806, + GUEST_FS_SELECTOR = 0x00000808, + GUEST_GS_SELECTOR = 0x0000080a, + GUEST_LDTR_SELECTOR = 0x0000080c, + GUEST_TR_SELECTOR = 0x0000080e, + GUEST_INTR_STATUS = 0x00000810, + GUEST_PML_INDEX = 0x00000812, + HOST_ES_SELECTOR = 0x00000c00, + HOST_CS_SELECTOR = 0x00000c02, + HOST_SS_SELECTOR = 0x00000c04, + HOST_DS_SELECTOR = 0x00000c06, + HOST_FS_SELECTOR = 0x00000c08, + HOST_GS_SELECTOR = 0x00000c0a, + HOST_TR_SELECTOR = 0x00000c0c, + IO_BITMAP_A = 0x00002000, + IO_BITMAP_A_HIGH = 0x00002001, + IO_BITMAP_B = 0x00002002, + IO_BITMAP_B_HIGH = 0x00002003, + MSR_BITMAP = 0x00002004, + MSR_BITMAP_HIGH = 0x00002005, + VM_EXIT_MSR_STORE_ADDR = 0x00002006, + VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007, + VM_EXIT_MSR_LOAD_ADDR = 0x00002008, + VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009, + VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a, + VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b, + PML_ADDRESS = 0x0000200e, + PML_ADDRESS_HIGH = 0x0000200f, + TSC_OFFSET = 0x00002010, + TSC_OFFSET_HIGH = 0x00002011, + VIRTUAL_APIC_PAGE_ADDR = 0x00002012, + VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, + APIC_ACCESS_ADDR = 0x00002014, + APIC_ACCESS_ADDR_HIGH = 0x00002015, + POSTED_INTR_DESC_ADDR = 0x00002016, + POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, + EPT_POINTER = 0x0000201a, + EPT_POINTER_HIGH = 0x0000201b, + EOI_EXIT_BITMAP0 = 0x0000201c, + EOI_EXIT_BITMAP0_HIGH = 0x0000201d, + EOI_EXIT_BITMAP1 = 0x0000201e, + EOI_EXIT_BITMAP1_HIGH = 0x0000201f, + EOI_EXIT_BITMAP2 = 0x00002020, + EOI_EXIT_BITMAP2_HIGH = 0x00002021, + EOI_EXIT_BITMAP3 = 0x00002022, + EOI_EXIT_BITMAP3_HIGH = 0x00002023, + VMREAD_BITMAP = 0x00002026, + VMREAD_BITMAP_HIGH = 0x00002027, + VMWRITE_BITMAP = 0x00002028, + VMWRITE_BITMAP_HIGH = 0x00002029, + XSS_EXIT_BITMAP = 0x0000202C, + XSS_EXIT_BITMAP_HIGH = 0x0000202D, + TSC_MULTIPLIER = 0x00002032, + TSC_MULTIPLIER_HIGH = 0x00002033, + GUEST_PHYSICAL_ADDRESS = 0x00002400, + GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, + VMCS_LINK_POINTER = 0x00002800, + VMCS_LINK_POINTER_HIGH = 0x00002801, + GUEST_IA32_DEBUGCTL = 0x00002802, + GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, + GUEST_IA32_PAT = 0x00002804, + GUEST_IA32_PAT_HIGH = 0x00002805, + GUEST_IA32_EFER = 0x00002806, + GUEST_IA32_EFER_HIGH = 0x00002807, + GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808, + GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809, + GUEST_PDPTR0 = 0x0000280a, + GUEST_PDPTR0_HIGH = 0x0000280b, + GUEST_PDPTR1 = 0x0000280c, + GUEST_PDPTR1_HIGH = 0x0000280d, + GUEST_PDPTR2 = 0x0000280e, + GUEST_PDPTR2_HIGH = 0x0000280f, + GUEST_PDPTR3 = 0x00002810, + GUEST_PDPTR3_HIGH = 0x00002811, + GUEST_BNDCFGS = 0x00002812, + GUEST_BNDCFGS_HIGH = 0x00002813, + HOST_IA32_PAT = 0x00002c00, + HOST_IA32_PAT_HIGH = 0x00002c01, + HOST_IA32_EFER = 0x00002c02, + HOST_IA32_EFER_HIGH = 0x00002c03, + HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04, + HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05, + PIN_BASED_VM_EXEC_CONTROL = 0x00004000, + CPU_BASED_VM_EXEC_CONTROL = 0x00004002, + EXCEPTION_BITMAP = 0x00004004, + PAGE_FAULT_ERROR_CODE_MASK = 0x00004006, + PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008, + CR3_TARGET_COUNT = 0x0000400a, + VM_EXIT_CONTROLS = 0x0000400c, + VM_EXIT_MSR_STORE_COUNT = 0x0000400e, + VM_EXIT_MSR_LOAD_COUNT = 0x00004010, + VM_ENTRY_CONTROLS = 0x00004012, + VM_ENTRY_MSR_LOAD_COUNT = 0x00004014, + VM_ENTRY_INTR_INFO_FIELD = 0x00004016, + VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018, + VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, + TPR_THRESHOLD = 0x0000401c, + SECONDARY_VM_EXEC_CONTROL = 0x0000401e, + PLE_GAP = 0x00004020, + PLE_WINDOW = 0x00004022, + VM_INSTRUCTION_ERROR = 0x00004400, + VM_EXIT_REASON = 0x00004402, + VM_EXIT_INTR_INFO = 0x00004404, + VM_EXIT_INTR_ERROR_CODE = 0x00004406, + IDT_VECTORING_INFO_FIELD = 0x00004408, + IDT_VECTORING_ERROR_CODE = 0x0000440a, + VM_EXIT_INSTRUCTION_LEN = 0x0000440c, + VMX_INSTRUCTION_INFO = 0x0000440e, + GUEST_ES_LIMIT = 0x00004800, + GUEST_CS_LIMIT = 0x00004802, + GUEST_SS_LIMIT = 0x00004804, + GUEST_DS_LIMIT = 0x00004806, + GUEST_FS_LIMIT = 0x00004808, + GUEST_GS_LIMIT = 0x0000480a, + GUEST_LDTR_LIMIT = 0x0000480c, + GUEST_TR_LIMIT = 0x0000480e, + GUEST_GDTR_LIMIT = 0x00004810, + GUEST_IDTR_LIMIT = 0x00004812, + GUEST_ES_AR_BYTES = 0x00004814, + GUEST_CS_AR_BYTES = 0x00004816, + GUEST_SS_AR_BYTES = 0x00004818, + GUEST_DS_AR_BYTES = 0x0000481a, + GUEST_FS_AR_BYTES = 0x0000481c, + GUEST_GS_AR_BYTES = 0x0000481e, + GUEST_LDTR_AR_BYTES = 0x00004820, + GUEST_TR_AR_BYTES = 0x00004822, + GUEST_INTERRUPTIBILITY_INFO = 0x00004824, + GUEST_ACTIVITY_STATE = 0X00004826, + GUEST_SYSENTER_CS = 0x0000482A, + VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, + HOST_IA32_SYSENTER_CS = 0x00004c00, + CR0_GUEST_HOST_MASK = 0x00006000, + CR4_GUEST_HOST_MASK = 0x00006002, + CR0_READ_SHADOW = 0x00006004, + CR4_READ_SHADOW = 0x00006006, + CR3_TARGET_VALUE0 = 0x00006008, + CR3_TARGET_VALUE1 = 0x0000600a, + CR3_TARGET_VALUE2 = 0x0000600c, + CR3_TARGET_VALUE3 = 0x0000600e, + EXIT_QUALIFICATION = 0x00006400, + GUEST_LINEAR_ADDRESS = 0x0000640a, + GUEST_CR0 = 0x00006800, + GUEST_CR3 = 0x00006802, + GUEST_CR4 = 0x00006804, + GUEST_ES_BASE = 0x00006806, + GUEST_CS_BASE = 0x00006808, + GUEST_SS_BASE = 0x0000680a, + GUEST_DS_BASE = 0x0000680c, + GUEST_FS_BASE = 0x0000680e, + GUEST_GS_BASE = 0x00006810, + GUEST_LDTR_BASE = 0x00006812, + GUEST_TR_BASE = 0x00006814, + GUEST_GDTR_BASE = 0x00006816, + GUEST_IDTR_BASE = 0x00006818, + GUEST_DR7 = 0x0000681a, + GUEST_RSP = 0x0000681c, + GUEST_RIP = 0x0000681e, + GUEST_RFLAGS = 0x00006820, + GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822, + GUEST_SYSENTER_ESP = 0x00006824, + GUEST_SYSENTER_EIP = 0x00006826, + HOST_CR0 = 0x00006c00, + HOST_CR3 = 0x00006c02, + HOST_CR4 = 0x00006c04, + HOST_FS_BASE = 0x00006c06, + HOST_GS_BASE = 0x00006c08, + HOST_TR_BASE = 0x00006c0a, + HOST_GDTR_BASE = 0x00006c0c, + HOST_IDTR_BASE = 0x00006c0e, + HOST_IA32_SYSENTER_ESP = 0x00006c10, + HOST_IA32_SYSENTER_EIP = 0x00006c12, + HOST_RSP = 0x00006c14, + HOST_RIP = 0x00006c16, +}; + +struct vmx_msr_entry { + uint32_t index; + uint32_t reserved; + uint64_t value; +} __attribute__ ((aligned(16))); + +static inline int vmxon(uint64_t phys) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmxon %[pa]; setna %[ret]" + : [ret]"=rm"(ret) + : [pa]"m"(phys) + : "cc", "memory"); + + return ret; +} + +static inline void vmxoff(void) +{ + __asm__ __volatile__("vmxoff"); +} + +static inline int vmclear(uint64_t vmcs_pa) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmclear %[pa]; setna %[ret]" + : [ret]"=rm"(ret) + : [pa]"m"(vmcs_pa) + : "cc", "memory"); + + return ret; +} + +static inline int vmptrld(uint64_t vmcs_pa) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]" + : [ret]"=rm"(ret) + : [pa]"m"(vmcs_pa) + : "cc", "memory"); + + return ret; +} + +/* + * No guest state (e.g. GPRs) is established by this vmlaunch. + */ +static inline int vmlaunch(void) +{ + int ret; + + __asm__ __volatile__("push %%rbp;" + "push %%rcx;" + "push %%rdx;" + "push %%rsi;" + "push %%rdi;" + "push $0;" + "vmwrite %%rsp, %[host_rsp];" + "lea 1f(%%rip), %%rax;" + "vmwrite %%rax, %[host_rip];" + "vmlaunch;" + "incq (%%rsp);" + "1: pop %%rax;" + "pop %%rdi;" + "pop %%rsi;" + "pop %%rdx;" + "pop %%rcx;" + "pop %%rbp;" + : [ret]"=&a"(ret) + : [host_rsp]"r"((uint64_t)HOST_RSP), + [host_rip]"r"((uint64_t)HOST_RIP) + : "memory", "cc", "rbx", "r8", "r9", "r10", + "r11", "r12", "r13", "r14", "r15"); + return ret; +} + +/* + * No guest state (e.g. GPRs) is established by this vmresume. + */ +static inline int vmresume(void) +{ + int ret; + + __asm__ __volatile__("push %%rbp;" + "push %%rcx;" + "push %%rdx;" + "push %%rsi;" + "push %%rdi;" + "push $0;" + "vmwrite %%rsp, %[host_rsp];" + "lea 1f(%%rip), %%rax;" + "vmwrite %%rax, %[host_rip];" + "vmresume;" + "incq (%%rsp);" + "1: pop %%rax;" + "pop %%rdi;" + "pop %%rsi;" + "pop %%rdx;" + "pop %%rcx;" + "pop %%rbp;" + : [ret]"=&a"(ret) + : [host_rsp]"r"((uint64_t)HOST_RSP), + [host_rip]"r"((uint64_t)HOST_RIP) + : "memory", "cc", "rbx", "r8", "r9", "r10", + "r11", "r12", "r13", "r14", "r15"); + return ret; +} + +static inline int vmread(uint64_t encoding, uint64_t *value) +{ + uint64_t tmp; + uint8_t ret; + + __asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]" + : [value]"=rm"(tmp), [ret]"=rm"(ret) + : [encoding]"r"(encoding) + : "cc", "memory"); + + *value = tmp; + return ret; +} + +/* + * A wrapper around vmread that ignores errors and returns zero if the + * vmread instruction fails. + */ +static inline uint64_t vmreadz(uint64_t encoding) +{ + uint64_t value = 0; + vmread(encoding, &value); + return value; +} + +static inline int vmwrite(uint64_t encoding, uint64_t value) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]" + : [ret]"=rm"(ret) + : [value]"rm"(value), [encoding]"r"(encoding) + : "cc", "memory"); + + return ret; +} + +static inline uint32_t vmcs_revision(void) +{ + return rdmsr(MSR_IA32_VMX_BASIC); +} + +void prepare_for_vmx_operation(void); +void prepare_vmcs(void *guest_rip, void *guest_rsp); +struct kvm_vm *vm_create_default_vmx(uint32_t vcpuid, + vmx_guest_code_t guest_code); + +#endif /* !SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 7ca1bb40c498..37e2a787d2fc 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -50,8 +50,8 @@ int kvm_check_cap(long cap) int kvm_fd; kvm_fd = open(KVM_DEV_PATH, O_RDONLY); - TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", - KVM_DEV_PATH, kvm_fd, errno); + if (kvm_fd < 0) + exit(KSFT_SKIP); ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap); TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n" @@ -91,8 +91,8 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) vm->mode = mode; kvm_fd = open(KVM_DEV_PATH, perm); - TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", - KVM_DEV_PATH, kvm_fd, errno); + if (kvm_fd < 0) + exit(KSFT_SKIP); /* Create VM. */ vm->fd = ioctl(kvm_fd, KVM_CREATE_VM, NULL); @@ -378,7 +378,7 @@ int kvm_memcmp_hva_gva(void *hva, * complicated. This function uses a reasonable default length for * the array and performs the appropriate allocation. */ -struct kvm_cpuid2 *allocate_kvm_cpuid2(void) +static struct kvm_cpuid2 *allocate_kvm_cpuid2(void) { struct kvm_cpuid2 *cpuid; int nent = 100; @@ -402,26 +402,31 @@ struct kvm_cpuid2 *allocate_kvm_cpuid2(void) * Input Args: None * * Output Args: - * cpuid - The supported KVM CPUID * - * Return: void + * Return: The supported KVM CPUID * * Get the guest CPUID supported by KVM. */ -void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid) +struct kvm_cpuid2 *kvm_get_supported_cpuid(void) { + static struct kvm_cpuid2 *cpuid; int ret; int kvm_fd; + if (cpuid) + return cpuid; + + cpuid = allocate_kvm_cpuid2(); kvm_fd = open(KVM_DEV_PATH, O_RDONLY); - TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", - KVM_DEV_PATH, kvm_fd, errno); + if (kvm_fd < 0) + exit(KSFT_SKIP); ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n", ret, errno); close(kvm_fd); + return cpuid; } /* Locate a cpuid entry. @@ -435,12 +440,13 @@ void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid) * Return: A pointer to the cpuid entry. Never returns NULL. */ struct kvm_cpuid_entry2 * -find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function, - uint32_t index) +kvm_get_supported_cpuid_index(uint32_t function, uint32_t index) { + struct kvm_cpuid2 *cpuid; struct kvm_cpuid_entry2 *entry = NULL; int i; + cpuid = kvm_get_supported_cpuid(); for (i = 0; i < cpuid->nent; i++) { if (cpuid->entries[i].function == function && cpuid->entries[i].index == index) { @@ -669,8 +675,8 @@ static int vcpu_mmap_sz(void) int dev_fd, ret; dev_fd = open(KVM_DEV_PATH, O_RDONLY); - TEST_ASSERT(dev_fd >= 0, "%s open %s failed, rc: %i errno: %i", - __func__, KVM_DEV_PATH, dev_fd, errno); + if (dev_fd < 0) + exit(KSFT_SKIP); ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); TEST_ASSERT(ret >= sizeof(struct kvm_run), @@ -1435,7 +1441,7 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, sparsebit_idx_t pg; TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " - "not divisable by page size.\n" + "not divisible by page size.\n" " paddr_min: 0x%lx page_size: 0x%x", paddr_min, vm->page_size); diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c index 0c5cf3e0cb6f..b132bc95d183 100644 --- a/tools/testing/selftests/kvm/lib/sparsebit.c +++ b/tools/testing/selftests/kvm/lib/sparsebit.c @@ -121,7 +121,7 @@ * avoided by moving the setting of the nodes mask bits into * the previous nodes num_after setting. * - * + Node starting index is evenly divisable by the number of bits + * + Node starting index is evenly divisible by the number of bits * within a nodes mask member. * * + Nodes never represent a range of bits that wrap around the @@ -1741,7 +1741,7 @@ void sparsebit_validate_internal(struct sparsebit *s) /* Validate node index is divisible by the mask size */ if (nodep->idx % MASK_BITS) { - fprintf(stderr, "Node index not divisable by " + fprintf(stderr, "Node index not divisible by " "mask size,\n" " nodep: %p nodep->idx: 0x%lx " "MASK_BITS: %lu\n", diff --git a/tools/testing/selftests/kvm/lib/vmx.c b/tools/testing/selftests/kvm/lib/vmx.c new file mode 100644 index 000000000000..0231bc0aae7b --- /dev/null +++ b/tools/testing/selftests/kvm/lib/vmx.c @@ -0,0 +1,243 @@ +/* + * tools/testing/selftests/kvm/lib/x86.c + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include "test_util.h" +#include "kvm_util.h" +#include "x86.h" +#include "vmx.h" + +/* Create a default VM for VMX tests. + * + * Input Args: + * vcpuid - The id of the single VCPU to add to the VM. + * guest_code - The vCPU's entry point + * + * Output Args: None + * + * Return: + * Pointer to opaque structure that describes the created VM. + */ +struct kvm_vm * +vm_create_default_vmx(uint32_t vcpuid, vmx_guest_code_t guest_code) +{ + struct kvm_cpuid2 *cpuid; + struct kvm_vm *vm; + vm_vaddr_t vmxon_vaddr; + vm_paddr_t vmxon_paddr; + vm_vaddr_t vmcs_vaddr; + vm_paddr_t vmcs_paddr; + + vm = vm_create_default(vcpuid, (void *) guest_code); + + /* Enable nesting in CPUID */ + vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid()); + + /* Setup of a region of guest memory for the vmxon region. */ + vmxon_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0); + vmxon_paddr = addr_gva2gpa(vm, vmxon_vaddr); + + /* Setup of a region of guest memory for a vmcs. */ + vmcs_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0); + vmcs_paddr = addr_gva2gpa(vm, vmcs_vaddr); + + vcpu_args_set(vm, vcpuid, 4, vmxon_vaddr, vmxon_paddr, vmcs_vaddr, + vmcs_paddr); + + return vm; +} + +void prepare_for_vmx_operation(void) +{ + uint64_t feature_control; + uint64_t required; + unsigned long cr0; + unsigned long cr4; + + /* + * Ensure bits in CR0 and CR4 are valid in VMX operation: + * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx. + * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx. + */ + __asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory"); + cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1); + cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0); + __asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory"); + + __asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory"); + cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1); + cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0); + /* Enable VMX operation */ + cr4 |= X86_CR4_VMXE; + __asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory"); + + /* + * Configure IA32_FEATURE_CONTROL MSR to allow VMXON: + * Bit 0: Lock bit. If clear, VMXON causes a #GP. + * Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON + * outside of SMX causes a #GP. + */ + required = FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + required |= FEATURE_CONTROL_LOCKED; + feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); + if ((feature_control & required) != required) + wrmsr(MSR_IA32_FEATURE_CONTROL, feature_control | required); +} + +/* + * Initialize the control fields to the most basic settings possible. + */ +static inline void init_vmcs_control_fields(void) +{ + vmwrite(VIRTUAL_PROCESSOR_ID, 0); + vmwrite(POSTED_INTR_NV, 0); + + vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PINBASED_CTLS)); + vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PROCBASED_CTLS)); + vmwrite(EXCEPTION_BITMAP, 0); + vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0); + vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */ + vmwrite(CR3_TARGET_COUNT, 0); + vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) | + VM_EXIT_HOST_ADDR_SPACE_SIZE); /* 64-bit host */ + vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); + vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0); + vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) | + VM_ENTRY_IA32E_MODE); /* 64-bit guest */ + vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0); + vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0); + vmwrite(TPR_THRESHOLD, 0); + vmwrite(SECONDARY_VM_EXEC_CONTROL, 0); + + vmwrite(CR0_GUEST_HOST_MASK, 0); + vmwrite(CR4_GUEST_HOST_MASK, 0); + vmwrite(CR0_READ_SHADOW, get_cr0()); + vmwrite(CR4_READ_SHADOW, get_cr4()); +} + +/* + * Initialize the host state fields based on the current host state, with + * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch + * or vmresume. + */ +static inline void init_vmcs_host_state(void) +{ + uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS); + + vmwrite(HOST_ES_SELECTOR, get_es()); + vmwrite(HOST_CS_SELECTOR, get_cs()); + vmwrite(HOST_SS_SELECTOR, get_ss()); + vmwrite(HOST_DS_SELECTOR, get_ds()); + vmwrite(HOST_FS_SELECTOR, get_fs()); + vmwrite(HOST_GS_SELECTOR, get_gs()); + vmwrite(HOST_TR_SELECTOR, get_tr()); + + if (exit_controls & VM_EXIT_LOAD_IA32_PAT) + vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT)); + if (exit_controls & VM_EXIT_LOAD_IA32_EFER) + vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER)); + if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) + vmwrite(HOST_IA32_PERF_GLOBAL_CTRL, + rdmsr(MSR_CORE_PERF_GLOBAL_CTRL)); + + vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS)); + + vmwrite(HOST_CR0, get_cr0()); + vmwrite(HOST_CR3, get_cr3()); + vmwrite(HOST_CR4, get_cr4()); + vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE)); + vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE)); + vmwrite(HOST_TR_BASE, + get_desc64_base((struct desc64 *)(get_gdt_base() + get_tr()))); + vmwrite(HOST_GDTR_BASE, get_gdt_base()); + vmwrite(HOST_IDTR_BASE, get_idt_base()); + vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP)); + vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP)); +} + +/* + * Initialize the guest state fields essentially as a clone of + * the host state fields. Some host state fields have fixed + * values, and we set the corresponding guest state fields accordingly. + */ +static inline void init_vmcs_guest_state(void *rip, void *rsp) +{ + vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR)); + vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR)); + vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR)); + vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR)); + vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR)); + vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR)); + vmwrite(GUEST_LDTR_SELECTOR, 0); + vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR)); + vmwrite(GUEST_INTR_STATUS, 0); + vmwrite(GUEST_PML_INDEX, 0); + + vmwrite(VMCS_LINK_POINTER, -1ll); + vmwrite(GUEST_IA32_DEBUGCTL, 0); + vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT)); + vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER)); + vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL, + vmreadz(HOST_IA32_PERF_GLOBAL_CTRL)); + + vmwrite(GUEST_ES_LIMIT, -1); + vmwrite(GUEST_CS_LIMIT, -1); + vmwrite(GUEST_SS_LIMIT, -1); + vmwrite(GUEST_DS_LIMIT, -1); + vmwrite(GUEST_FS_LIMIT, -1); + vmwrite(GUEST_GS_LIMIT, -1); + vmwrite(GUEST_LDTR_LIMIT, -1); + vmwrite(GUEST_TR_LIMIT, 0x67); + vmwrite(GUEST_GDTR_LIMIT, 0xffff); + vmwrite(GUEST_IDTR_LIMIT, 0xffff); + vmwrite(GUEST_ES_AR_BYTES, + vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_CS_AR_BYTES, 0xa09b); + vmwrite(GUEST_SS_AR_BYTES, 0xc093); + vmwrite(GUEST_DS_AR_BYTES, + vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_FS_AR_BYTES, + vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_GS_AR_BYTES, + vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_LDTR_AR_BYTES, 0x10000); + vmwrite(GUEST_TR_AR_BYTES, 0x8b); + vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0); + vmwrite(GUEST_ACTIVITY_STATE, 0); + vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS)); + vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0); + + vmwrite(GUEST_CR0, vmreadz(HOST_CR0)); + vmwrite(GUEST_CR3, vmreadz(HOST_CR3)); + vmwrite(GUEST_CR4, vmreadz(HOST_CR4)); + vmwrite(GUEST_ES_BASE, 0); + vmwrite(GUEST_CS_BASE, 0); + vmwrite(GUEST_SS_BASE, 0); + vmwrite(GUEST_DS_BASE, 0); + vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE)); + vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE)); + vmwrite(GUEST_LDTR_BASE, 0); + vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE)); + vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE)); + vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE)); + vmwrite(GUEST_DR7, 0x400); + vmwrite(GUEST_RSP, (uint64_t)rsp); + vmwrite(GUEST_RIP, (uint64_t)rip); + vmwrite(GUEST_RFLAGS, 2); + vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0); + vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP)); + vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP)); +} + +void prepare_vmcs(void *guest_rip, void *guest_rsp) +{ + init_vmcs_control_fields(); + init_vmcs_host_state(); + init_vmcs_guest_state(guest_rip, guest_rsp); +} diff --git a/tools/testing/selftests/kvm/sync_regs_test.c b/tools/testing/selftests/kvm/sync_regs_test.c index 428e9473f5e2..eae1ece3c31b 100644 --- a/tools/testing/selftests/kvm/sync_regs_test.c +++ b/tools/testing/selftests/kvm/sync_regs_test.c @@ -85,6 +85,9 @@ static void compare_vcpu_events(struct kvm_vcpu_events *left, { } +#define TEST_SYNC_FIELDS (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS) +#define INVALID_SYNC_FIELD 0x80000000 + int main(int argc, char *argv[]) { struct kvm_vm *vm; @@ -98,9 +101,14 @@ int main(int argc, char *argv[]) setbuf(stdout, NULL); cap = kvm_check_cap(KVM_CAP_SYNC_REGS); - TEST_ASSERT((unsigned long)cap == KVM_SYNC_X86_VALID_FIELDS, - "KVM_CAP_SYNC_REGS (0x%x) != KVM_SYNC_X86_VALID_FIELDS (0x%lx)\n", - cap, KVM_SYNC_X86_VALID_FIELDS); + if ((cap & TEST_SYNC_FIELDS) != TEST_SYNC_FIELDS) { + fprintf(stderr, "KVM_CAP_SYNC_REGS not supported, skipping test\n"); + exit(KSFT_SKIP); + } + if ((cap & INVALID_SYNC_FIELD) != 0) { + fprintf(stderr, "The \"invalid\" field is not invalid, skipping test\n"); + exit(KSFT_SKIP); + } /* Create VM */ vm = vm_create_default(VCPU_ID, guest_code); @@ -108,7 +116,14 @@ int main(int argc, char *argv[]) run = vcpu_state(vm, VCPU_ID); /* Request reading invalid register set from VCPU. */ - run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS << 1; + run->kvm_valid_regs = INVALID_SYNC_FIELD; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + rv); + vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0; + + run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(rv < 0 && errno == EINVAL, "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", @@ -116,7 +131,14 @@ int main(int argc, char *argv[]) vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0; /* Request setting invalid register set into VCPU. */ - run->kvm_dirty_regs = KVM_SYNC_X86_VALID_FIELDS << 1; + run->kvm_dirty_regs = INVALID_SYNC_FIELD; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + rv); + vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0; + + run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(rv < 0 && errno == EINVAL, "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", @@ -125,7 +147,7 @@ int main(int argc, char *argv[]) /* Request and verify all valid register sets. */ /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */ - run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS; + run->kvm_valid_regs = TEST_SYNC_FIELDS; rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Unexpected exit reason: %u (%s),\n", @@ -146,7 +168,7 @@ int main(int argc, char *argv[]) run->s.regs.sregs.apic_base = 1 << 11; /* TODO run->s.regs.events.XYZ = ABC; */ - run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS; + run->kvm_valid_regs = TEST_SYNC_FIELDS; run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS; rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, @@ -172,7 +194,7 @@ int main(int argc, char *argv[]) /* Clear kvm_dirty_regs bits, verify new s.regs values are * overwritten with existing guest values. */ - run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS; + run->kvm_valid_regs = TEST_SYNC_FIELDS; run->kvm_dirty_regs = 0; run->s.regs.regs.r11 = 0xDEADBEEF; rv = _vcpu_run(vm, VCPU_ID); @@ -211,7 +233,7 @@ int main(int argc, char *argv[]) * with kvm_sync_regs values. */ run->kvm_valid_regs = 0; - run->kvm_dirty_regs = KVM_SYNC_X86_VALID_FIELDS; + run->kvm_dirty_regs = TEST_SYNC_FIELDS; run->s.regs.regs.r11 = 0xBBBB; rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c new file mode 100644 index 000000000000..aaa633263b2c --- /dev/null +++ b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c @@ -0,0 +1,231 @@ +/* + * gtests/tests/vmx_tsc_adjust_test.c + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * + * IA32_TSC_ADJUST test + * + * According to the SDM, "if an execution of WRMSR to the + * IA32_TIME_STAMP_COUNTER MSR adds (or subtracts) value X from the TSC, + * the logical processor also adds (or subtracts) value X from the + * IA32_TSC_ADJUST MSR. + * + * Note that when L1 doesn't intercept writes to IA32_TSC, a + * WRMSR(IA32_TSC) from L2 sets L1's TSC value, not L2's perceived TSC + * value. + * + * This test verifies that this unusual case is handled correctly. + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "x86.h" +#include "vmx.h" + +#include <string.h> +#include <sys/ioctl.h> + +#ifndef MSR_IA32_TSC_ADJUST +#define MSR_IA32_TSC_ADJUST 0x3b +#endif + +#define PAGE_SIZE 4096 +#define VCPU_ID 5 + +#define TSC_ADJUST_VALUE (1ll << 32) +#define TSC_OFFSET_VALUE -(1ll << 48) + +enum { + PORT_ABORT = 0x1000, + PORT_REPORT, + PORT_DONE, +}; + +struct vmx_page { + vm_vaddr_t virt; + vm_paddr_t phys; +}; + +enum { + VMXON_PAGE = 0, + VMCS_PAGE, + MSR_BITMAP_PAGE, + + NUM_VMX_PAGES, +}; + +struct kvm_single_msr { + struct kvm_msrs header; + struct kvm_msr_entry entry; +} __attribute__((packed)); + +/* The virtual machine object. */ +static struct kvm_vm *vm; + +/* Array of vmx_page descriptors that is shared with the guest. */ +struct vmx_page *vmx_pages; + +#define exit_to_l0(_port, _arg) do_exit_to_l0(_port, (unsigned long) (_arg)) +static void do_exit_to_l0(uint16_t port, unsigned long arg) +{ + __asm__ __volatile__("in %[port], %%al" + : + : [port]"d"(port), "D"(arg) + : "rax"); +} + + +#define GUEST_ASSERT(_condition) do { \ + if (!(_condition)) \ + exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition); \ +} while (0) + +static void check_ia32_tsc_adjust(int64_t max) +{ + int64_t adjust; + + adjust = rdmsr(MSR_IA32_TSC_ADJUST); + exit_to_l0(PORT_REPORT, adjust); + GUEST_ASSERT(adjust <= max); +} + +static void l2_guest_code(void) +{ + uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE; + + wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE); + check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); + + /* Exit to L1 */ + __asm__ __volatile__("vmcall"); +} + +static void l1_guest_code(struct vmx_page *vmx_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uint32_t control; + uintptr_t save_cr3; + + GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE); + wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE); + check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); + + prepare_for_vmx_operation(); + + /* Enter VMX root operation. */ + *(uint32_t *)vmx_pages[VMXON_PAGE].virt = vmcs_revision(); + GUEST_ASSERT(!vmxon(vmx_pages[VMXON_PAGE].phys)); + + /* Load a VMCS. */ + *(uint32_t *)vmx_pages[VMCS_PAGE].virt = vmcs_revision(); + GUEST_ASSERT(!vmclear(vmx_pages[VMCS_PAGE].phys)); + GUEST_ASSERT(!vmptrld(vmx_pages[VMCS_PAGE].phys)); + + /* Prepare the VMCS for L2 execution. */ + prepare_vmcs(l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); + control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETING; + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); + vmwrite(MSR_BITMAP, vmx_pages[MSR_BITMAP_PAGE].phys); + vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE); + + /* Jump into L2. First, test failure to load guest CR3. */ + save_cr3 = vmreadz(GUEST_CR3); + vmwrite(GUEST_CR3, -1ull); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == + (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE)); + check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); + vmwrite(GUEST_CR3, save_cr3); + + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); + + exit_to_l0(PORT_DONE, 0); +} + +static void allocate_vmx_page(struct vmx_page *page) +{ + vm_vaddr_t virt; + + virt = vm_vaddr_alloc(vm, PAGE_SIZE, 0, 0, 0); + memset(addr_gva2hva(vm, virt), 0, PAGE_SIZE); + + page->virt = virt; + page->phys = addr_gva2gpa(vm, virt); +} + +static vm_vaddr_t allocate_vmx_pages(void) +{ + vm_vaddr_t vmx_pages_vaddr; + int i; + + vmx_pages_vaddr = vm_vaddr_alloc( + vm, sizeof(struct vmx_page) * NUM_VMX_PAGES, 0, 0, 0); + + vmx_pages = (void *) addr_gva2hva(vm, vmx_pages_vaddr); + + for (i = 0; i < NUM_VMX_PAGES; i++) + allocate_vmx_page(&vmx_pages[i]); + + return vmx_pages_vaddr; +} + +void report(int64_t val) +{ + printf("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n", + val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t vmx_pages_vaddr; + struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); + + if (!(entry->ecx & CPUID_VMX)) { + fprintf(stderr, "nested VMX not enabled, skipping test\n"); + exit(KSFT_SKIP); + } + + vm = vm_create_default_vmx(VCPU_ID, (void *) l1_guest_code); + + /* Allocate VMX pages and shared descriptors (vmx_pages). */ + vmx_pages_vaddr = allocate_vmx_pages(); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_vaddr); + + for (;;) { + volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct kvm_regs regs; + + vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Got exit_reason other than KVM_EXIT_IO: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + vcpu_regs_get(vm, VCPU_ID, ®s); + + switch (run->io.port) { + case PORT_ABORT: + TEST_ASSERT(false, "%s", (const char *) regs.rdi); + /* NOT REACHED */ + case PORT_REPORT: + report(regs.rdi); + break; + case PORT_DONE: + goto done; + default: + TEST_ASSERT(false, "Unknown port 0x%x.", run->io.port); + } + } + + kvm_vm_free(vm); +done: + return 0; +} diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 195e9d4739a9..c1b1a4dc6a96 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -20,10 +20,10 @@ all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) .ONESHELL: define RUN_TESTS - @export KSFT_TAP_LEVEL=`echo 1`; - @test_num=`echo 0`; - @echo "TAP version 13"; - @for TEST in $(1); do \ + @export KSFT_TAP_LEVEL=`echo 1`; \ + test_num=`echo 0`; \ + echo "TAP version 13"; \ + for TEST in $(1); do \ BASENAME_TEST=`basename $$TEST`; \ test_num=`echo $$test_num+1 | bc`; \ echo "selftests: $$BASENAME_TEST"; \ diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 785fc18a16b4..3ff81a478dbe 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -6,6 +6,7 @@ CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh +TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore new file mode 100644 index 000000000000..6c16f77c722c --- /dev/null +++ b/tools/testing/selftests/proc/.gitignore @@ -0,0 +1,8 @@ +/proc-loadavg-001 +/proc-self-map-files-001 +/proc-self-map-files-002 +/proc-self-syscall +/proc-self-wchan +/proc-uptime-001 +/proc-uptime-002 +/read diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile new file mode 100644 index 000000000000..dbb87e56264c --- /dev/null +++ b/tools/testing/selftests/proc/Makefile @@ -0,0 +1,13 @@ +CFLAGS += -Wall -O2 + +TEST_GEN_PROGS := +TEST_GEN_PROGS += proc-loadavg-001 +TEST_GEN_PROGS += proc-self-map-files-001 +TEST_GEN_PROGS += proc-self-map-files-002 +TEST_GEN_PROGS += proc-self-syscall +TEST_GEN_PROGS += proc-self-wchan +TEST_GEN_PROGS += proc-uptime-001 +TEST_GEN_PROGS += proc-uptime-002 +TEST_GEN_PROGS += read + +include ../lib.mk diff --git a/tools/testing/selftests/proc/config b/tools/testing/selftests/proc/config new file mode 100644 index 000000000000..68fbd2b35884 --- /dev/null +++ b/tools/testing/selftests/proc/config @@ -0,0 +1 @@ +CONFIG_PROC_FS=y diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c new file mode 100644 index 000000000000..fcff7047000d --- /dev/null +++ b/tools/testing/selftests/proc/proc-loadavg-001.c @@ -0,0 +1,83 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* Test that /proc/loadavg correctly reports last pid in pid namespace. */ +#define _GNU_SOURCE +#include <errno.h> +#include <sched.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/wait.h> + +int main(void) +{ + pid_t pid; + int wstatus; + + if (unshare(CLONE_NEWPID) == -1) { + if (errno == ENOSYS || errno == EPERM) + return 2; + return 1; + } + + pid = fork(); + if (pid == -1) + return 1; + if (pid == 0) { + char buf[128], *p; + int fd; + ssize_t rv; + + fd = open("/proc/loadavg" , O_RDONLY); + if (fd == -1) + return 1; + rv = read(fd, buf, sizeof(buf)); + if (rv < 3) + return 1; + p = buf + rv; + + /* pid 1 */ + if (!(p[-3] == ' ' && p[-2] == '1' && p[-1] == '\n')) + return 1; + + pid = fork(); + if (pid == -1) + return 1; + if (pid == 0) + return 0; + if (waitpid(pid, NULL, 0) == -1) + return 1; + + lseek(fd, 0, SEEK_SET); + rv = read(fd, buf, sizeof(buf)); + if (rv < 3) + return 1; + p = buf + rv; + + /* pid 2 */ + if (!(p[-3] == ' ' && p[-2] == '2' && p[-1] == '\n')) + return 1; + + return 0; + } + + if (waitpid(pid, &wstatus, 0) == -1) + return 1; + if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) + return 0; + return 1; +} diff --git a/tools/testing/selftests/proc/proc-self-map-files-001.c b/tools/testing/selftests/proc/proc-self-map-files-001.c new file mode 100644 index 000000000000..4209c64283d6 --- /dev/null +++ b/tools/testing/selftests/proc/proc-self-map-files-001.c @@ -0,0 +1,82 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* Test readlink /proc/self/map_files/... */ +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/mman.h> +#include <stdlib.h> + +static void pass(const char *fmt, unsigned long a, unsigned long b) +{ + char name[64]; + char buf[64]; + + snprintf(name, sizeof(name), fmt, a, b); + if (readlink(name, buf, sizeof(buf)) == -1) + exit(1); +} + +static void fail(const char *fmt, unsigned long a, unsigned long b) +{ + char name[64]; + char buf[64]; + + snprintf(name, sizeof(name), fmt, a, b); + if (readlink(name, buf, sizeof(buf)) == -1 && errno == ENOENT) + return; + exit(1); +} + +int main(void) +{ + const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE); + void *p; + int fd; + unsigned long a, b; + + fd = open("/dev/zero", O_RDONLY); + if (fd == -1) + return 1; + + p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE, fd, 0); + if (p == MAP_FAILED) + return 1; + + a = (unsigned long)p; + b = (unsigned long)p + PAGE_SIZE; + + pass("/proc/self/map_files/%lx-%lx", a, b); + fail("/proc/self/map_files/ %lx-%lx", a, b); + fail("/proc/self/map_files/%lx -%lx", a, b); + fail("/proc/self/map_files/%lx- %lx", a, b); + fail("/proc/self/map_files/%lx-%lx ", a, b); + fail("/proc/self/map_files/0%lx-%lx", a, b); + fail("/proc/self/map_files/%lx-0%lx", a, b); + if (sizeof(long) == 4) { + fail("/proc/self/map_files/100000000%lx-%lx", a, b); + fail("/proc/self/map_files/%lx-100000000%lx", a, b); + } else if (sizeof(long) == 8) { + fail("/proc/self/map_files/10000000000000000%lx-%lx", a, b); + fail("/proc/self/map_files/%lx-10000000000000000%lx", a, b); + } else + return 1; + + return 0; +} diff --git a/tools/testing/selftests/proc/proc-self-map-files-002.c b/tools/testing/selftests/proc/proc-self-map-files-002.c new file mode 100644 index 000000000000..6f1f4a6e1ecb --- /dev/null +++ b/tools/testing/selftests/proc/proc-self-map-files-002.c @@ -0,0 +1,85 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* Test readlink /proc/self/map_files/... with address 0. */ +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/mman.h> +#include <stdlib.h> + +static void pass(const char *fmt, unsigned long a, unsigned long b) +{ + char name[64]; + char buf[64]; + + snprintf(name, sizeof(name), fmt, a, b); + if (readlink(name, buf, sizeof(buf)) == -1) + exit(1); +} + +static void fail(const char *fmt, unsigned long a, unsigned long b) +{ + char name[64]; + char buf[64]; + + snprintf(name, sizeof(name), fmt, a, b); + if (readlink(name, buf, sizeof(buf)) == -1 && errno == ENOENT) + return; + exit(1); +} + +int main(void) +{ + const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE); + void *p; + int fd; + unsigned long a, b; + + fd = open("/dev/zero", O_RDONLY); + if (fd == -1) + return 1; + + p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0); + if (p == MAP_FAILED) { + if (errno == EPERM) + return 2; + return 1; + } + + a = (unsigned long)p; + b = (unsigned long)p + PAGE_SIZE; + + pass("/proc/self/map_files/%lx-%lx", a, b); + fail("/proc/self/map_files/ %lx-%lx", a, b); + fail("/proc/self/map_files/%lx -%lx", a, b); + fail("/proc/self/map_files/%lx- %lx", a, b); + fail("/proc/self/map_files/%lx-%lx ", a, b); + fail("/proc/self/map_files/0%lx-%lx", a, b); + fail("/proc/self/map_files/%lx-0%lx", a, b); + if (sizeof(long) == 4) { + fail("/proc/self/map_files/100000000%lx-%lx", a, b); + fail("/proc/self/map_files/%lx-100000000%lx", a, b); + } else if (sizeof(long) == 8) { + fail("/proc/self/map_files/10000000000000000%lx-%lx", a, b); + fail("/proc/self/map_files/%lx-10000000000000000%lx", a, b); + } else + return 1; + + return 0; +} diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c new file mode 100644 index 000000000000..5ab5f4810e43 --- /dev/null +++ b/tools/testing/selftests/proc/proc-self-syscall.c @@ -0,0 +1,60 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#define _GNU_SOURCE +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <unistd.h> +#include <string.h> +#include <stdio.h> + +static inline ssize_t sys_read(int fd, void *buf, size_t len) +{ + return syscall(SYS_read, fd, buf, len); +} + +int main(void) +{ + char buf1[64]; + char buf2[64]; + int fd; + ssize_t rv; + + fd = open("/proc/self/syscall", O_RDONLY); + if (fd == -1) { + if (errno == ENOENT) + return 2; + return 1; + } + + /* Do direct system call as libc can wrap anything. */ + snprintf(buf1, sizeof(buf1), "%ld 0x%lx 0x%lx 0x%lx", + (long)SYS_read, (long)fd, (long)buf2, (long)sizeof(buf2)); + + memset(buf2, 0, sizeof(buf2)); + rv = sys_read(fd, buf2, sizeof(buf2)); + if (rv < 0) + return 1; + if (rv < strlen(buf1)) + return 1; + if (strncmp(buf1, buf2, strlen(buf1)) != 0) + return 1; + + return 0; +} diff --git a/tools/testing/selftests/proc/proc-self-wchan.c b/tools/testing/selftests/proc/proc-self-wchan.c new file mode 100644 index 000000000000..a38b2fbaa7ad --- /dev/null +++ b/tools/testing/selftests/proc/proc-self-wchan.c @@ -0,0 +1,40 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <unistd.h> + +int main(void) +{ + char buf[64]; + int fd; + + fd = open("/proc/self/wchan", O_RDONLY); + if (fd == -1) { + if (errno == ENOENT) + return 2; + return 1; + } + + buf[0] = '\0'; + if (read(fd, buf, sizeof(buf)) != 1) + return 1; + if (buf[0] != '0') + return 1; + return 0; +} diff --git a/tools/testing/selftests/proc/proc-uptime-001.c b/tools/testing/selftests/proc/proc-uptime-001.c new file mode 100644 index 000000000000..781f7a50fc3f --- /dev/null +++ b/tools/testing/selftests/proc/proc-uptime-001.c @@ -0,0 +1,45 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +// Test that values in /proc/uptime increment monotonically. +#undef NDEBUG +#include <assert.h> +#include <stdint.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include "proc-uptime.h" + +int main(void) +{ + uint64_t start, u0, u1, i0, i1; + int fd; + + fd = open("/proc/uptime", O_RDONLY); + assert(fd >= 0); + + proc_uptime(fd, &u0, &i0); + start = u0; + do { + proc_uptime(fd, &u1, &i1); + assert(u1 >= u0); + assert(i1 >= i0); + u0 = u1; + i0 = i1; + } while (u1 - start < 100); + + return 0; +} diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c new file mode 100644 index 000000000000..30e2b7849089 --- /dev/null +++ b/tools/testing/selftests/proc/proc-uptime-002.c @@ -0,0 +1,79 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +// Test that values in /proc/uptime increment monotonically +// while shifting across CPUs. +#define _GNU_SOURCE +#undef NDEBUG +#include <assert.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <stdlib.h> +#include <string.h> + +#include <stdint.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include "proc-uptime.h" + +static inline int sys_sched_getaffinity(pid_t pid, unsigned int len, unsigned long *m) +{ + return syscall(SYS_sched_getaffinity, pid, len, m); +} + +static inline int sys_sched_setaffinity(pid_t pid, unsigned int len, unsigned long *m) +{ + return syscall(SYS_sched_setaffinity, pid, len, m); +} + +int main(void) +{ + unsigned int len; + unsigned long *m; + unsigned int cpu; + uint64_t u0, u1, i0, i1; + int fd; + + /* find out "nr_cpu_ids" */ + m = NULL; + len = 0; + do { + len += sizeof(unsigned long); + free(m); + m = malloc(len); + } while (sys_sched_getaffinity(0, len, m) == -EINVAL); + + fd = open("/proc/uptime", O_RDONLY); + assert(fd >= 0); + + proc_uptime(fd, &u0, &i0); + for (cpu = 0; cpu < len * 8; cpu++) { + memset(m, 0, len); + m[cpu / (8 * sizeof(unsigned long))] |= 1UL << (cpu % (8 * sizeof(unsigned long))); + + /* CPU might not exist, ignore error */ + sys_sched_setaffinity(0, len, m); + + proc_uptime(fd, &u1, &i1); + assert(u1 >= u0); + assert(i1 >= i0); + u0 = u1; + i0 = i1; + } + + return 0; +} diff --git a/tools/testing/selftests/proc/proc-uptime.h b/tools/testing/selftests/proc/proc-uptime.h new file mode 100644 index 000000000000..0e464b50e9d9 --- /dev/null +++ b/tools/testing/selftests/proc/proc-uptime.h @@ -0,0 +1,74 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#undef NDEBUG +#include <assert.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> + +static unsigned long long xstrtoull(const char *p, char **end) +{ + if (*p == '0') { + *end = (char *)p + 1; + return 0; + } else if ('1' <= *p && *p <= '9') { + unsigned long long val; + + errno = 0; + val = strtoull(p, end, 10); + assert(errno == 0); + return val; + } else + assert(0); +} + +static void proc_uptime(int fd, uint64_t *uptime, uint64_t *idle) +{ + uint64_t val1, val2; + char buf[64], *p; + ssize_t rv; + + /* save "p < end" checks */ + memset(buf, 0, sizeof(buf)); + rv = pread(fd, buf, sizeof(buf), 0); + assert(0 <= rv && rv <= sizeof(buf)); + buf[sizeof(buf) - 1] = '\0'; + + p = buf; + + val1 = xstrtoull(p, &p); + assert(p[0] == '.'); + assert('0' <= p[1] && p[1] <= '9'); + assert('0' <= p[2] && p[2] <= '9'); + assert(p[3] == ' '); + + val2 = (p[1] - '0') * 10 + p[2] - '0'; + *uptime = val1 * 100 + val2; + + p += 4; + + val1 = xstrtoull(p, &p); + assert(p[0] == '.'); + assert('0' <= p[1] && p[1] <= '9'); + assert('0' <= p[2] && p[2] <= '9'); + assert(p[3] == '\n'); + + val2 = (p[1] - '0') * 10 + p[2] - '0'; + *idle = val1 * 100 + val2; + + assert(p + 4 == buf + rv); +} diff --git a/tools/testing/selftests/proc/read.c b/tools/testing/selftests/proc/read.c new file mode 100644 index 000000000000..1e73c2232097 --- /dev/null +++ b/tools/testing/selftests/proc/read.c @@ -0,0 +1,147 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +// Test +// 1) read of every file in /proc +// 2) readlink of every symlink in /proc +// 3) recursively (1) + (2) for every directory in /proc +// 4) write to /proc/*/clear_refs and /proc/*/task/*/clear_refs +// 5) write to /proc/sysrq-trigger +#undef NDEBUG +#include <assert.h> +#include <errno.h> +#include <sys/types.h> +#include <dirent.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +static inline bool streq(const char *s1, const char *s2) +{ + return strcmp(s1, s2) == 0; +} + +static struct dirent *xreaddir(DIR *d) +{ + struct dirent *de; + + errno = 0; + de = readdir(d); + if (!de && errno != 0) { + exit(1); + } + return de; +} + +static void f_reg(DIR *d, const char *filename) +{ + char buf[4096]; + int fd; + ssize_t rv; + + /* read from /proc/kmsg can block */ + fd = openat(dirfd(d), filename, O_RDONLY|O_NONBLOCK); + if (fd == -1) + return; + rv = read(fd, buf, sizeof(buf)); + assert((0 <= rv && rv <= sizeof(buf)) || rv == -1); + close(fd); +} + +static void f_reg_write(DIR *d, const char *filename, const char *buf, size_t len) +{ + int fd; + ssize_t rv; + + fd = openat(dirfd(d), filename, O_WRONLY); + if (fd == -1) + return; + rv = write(fd, buf, len); + assert((0 <= rv && rv <= len) || rv == -1); + close(fd); +} + +static void f_lnk(DIR *d, const char *filename) +{ + char buf[4096]; + ssize_t rv; + + rv = readlinkat(dirfd(d), filename, buf, sizeof(buf)); + assert((0 <= rv && rv <= sizeof(buf)) || rv == -1); +} + +static void f(DIR *d, unsigned int level) +{ + struct dirent *de; + + de = xreaddir(d); + assert(de->d_type == DT_DIR); + assert(streq(de->d_name, ".")); + + de = xreaddir(d); + assert(de->d_type == DT_DIR); + assert(streq(de->d_name, "..")); + + while ((de = xreaddir(d))) { + assert(!streq(de->d_name, ".")); + assert(!streq(de->d_name, "..")); + + switch (de->d_type) { + DIR *dd; + int fd; + + case DT_REG: + if (level == 0 && streq(de->d_name, "sysrq-trigger")) { + f_reg_write(d, de->d_name, "h", 1); + } else if (level == 1 && streq(de->d_name, "clear_refs")) { + f_reg_write(d, de->d_name, "1", 1); + } else if (level == 3 && streq(de->d_name, "clear_refs")) { + f_reg_write(d, de->d_name, "1", 1); + } else { + f_reg(d, de->d_name); + } + break; + case DT_DIR: + fd = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY); + if (fd == -1) + continue; + dd = fdopendir(fd); + if (!dd) + continue; + f(dd, level + 1); + closedir(dd); + break; + case DT_LNK: + f_lnk(d, de->d_name); + break; + default: + assert(0); + } + } +} + +int main(void) +{ + DIR *d; + + d = opendir("/proc"); + if (!d) + return 2; + f(d, 0); + return 0; +} diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 168c66d74fc5..e1473234968d 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -134,11 +134,15 @@ struct seccomp_data { #endif #ifndef SECCOMP_FILTER_FLAG_TSYNC -#define SECCOMP_FILTER_FLAG_TSYNC 1 +#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) #endif #ifndef SECCOMP_FILTER_FLAG_LOG -#define SECCOMP_FILTER_FLAG_LOG 2 +#define SECCOMP_FILTER_FLAG_LOG (1UL << 1) +#endif + +#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW +#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) #endif #ifndef PTRACE_SECCOMP_GET_METADATA @@ -2072,14 +2076,26 @@ TEST(seccomp_syscall_mode_lock) TEST(detect_seccomp_filter_flags) { unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, - SECCOMP_FILTER_FLAG_LOG }; + SECCOMP_FILTER_FLAG_LOG, + SECCOMP_FILTER_FLAG_SPEC_ALLOW }; unsigned int flag, all_flags; int i; long ret; /* Test detection of known-good filter flags */ for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { + int bits = 0; + flag = flags[i]; + /* Make sure the flag is a single bit! */ + while (flag) { + if (flag & 0x1) + bits ++; + flag >>= 1; + } + ASSERT_EQ(1, bits); + flag = flags[i]; + ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); ASSERT_NE(ENOSYS, errno) { TH_LOG("Kernel does not support seccomp syscall!"); diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json index 5b012f4981d4..6f289a49e5ec 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json @@ -66,7 +66,7 @@ "cmdUnderTest": "$TC action add action bpf object-file _b.o index 667", "expExitCode": "0", "verifyCmd": "$TC action get action bpf index 667", - "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c default-action pipe.*index 667 ref", + "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c( jited)? default-action pipe.*index 667 ref", "matchCount": "1", "teardown": [ "$TC action flush action bpf", @@ -92,10 +92,15 @@ "cmdUnderTest": "$TC action add action bpf object-file _c.o index 667", "expExitCode": "255", "verifyCmd": "$TC action get action bpf index 667", - "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9].*index 667 ref", + "matchPattern": "action order [0-9]*: bpf _c.o:\\[action\\] id [0-9].*index 667 ref", "matchCount": "0", "teardown": [ - "$TC action flush action bpf", + [ + "$TC action flush action bpf", + 0, + 1, + 255 + ], "rm -f _c.o" ] }, diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index d744991c0f4f..39f66bc29b82 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -11,7 +11,7 @@ CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \ - protection_keys test_vdso test_vsyscall + protection_keys test_vdso test_vsyscall mov_ss_trap TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c new file mode 100644 index 000000000000..3c3a022654f3 --- /dev/null +++ b/tools/testing/selftests/x86/mov_ss_trap.c @@ -0,0 +1,285 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * mov_ss_trap.c: Exercise the bizarre side effects of a watchpoint on MOV SS + * + * This does MOV SS from a watchpointed address followed by various + * types of kernel entries. A MOV SS that hits a watchpoint will queue + * up a #DB trap but will not actually deliver that trap. The trap + * will be delivered after the next instruction instead. The CPU's logic + * seems to be: + * + * - Any fault: drop the pending #DB trap. + * - INT $N, INT3, INTO, SYSCALL, SYSENTER: enter the kernel and then + * deliver #DB. + * - ICEBP: enter the kernel but do not deliver the watchpoint trap + * - breakpoint: only one #DB is delivered (phew!) + * + * There are plenty of ways for a kernel to handle this incorrectly. This + * test tries to exercise all the cases. + * + * This should mostly cover CVE-2018-1087 and CVE-2018-8897. + */ +#define _GNU_SOURCE + +#include <stdlib.h> +#include <sys/ptrace.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/user.h> +#include <sys/syscall.h> +#include <unistd.h> +#include <errno.h> +#include <stddef.h> +#include <stdio.h> +#include <err.h> +#include <string.h> +#include <setjmp.h> +#include <sys/prctl.h> + +#define X86_EFLAGS_RF (1UL << 16) + +#if __x86_64__ +# define REG_IP REG_RIP +#else +# define REG_IP REG_EIP +#endif + +unsigned short ss; +extern unsigned char breakpoint_insn[]; +sigjmp_buf jmpbuf; +static unsigned char altstack_data[SIGSTKSZ]; + +static void enable_watchpoint(void) +{ + pid_t parent = getpid(); + int status; + + pid_t child = fork(); + if (child < 0) + err(1, "fork"); + + if (child) { + if (waitpid(child, &status, 0) != child) + err(1, "waitpid for child"); + } else { + unsigned long dr0, dr1, dr7; + + dr0 = (unsigned long)&ss; + dr1 = (unsigned long)breakpoint_insn; + dr7 = ((1UL << 1) | /* G0 */ + (3UL << 16) | /* RW0 = read or write */ + (1UL << 18) | /* LEN0 = 2 bytes */ + (1UL << 3)); /* G1, RW1 = insn */ + + if (ptrace(PTRACE_ATTACH, parent, NULL, NULL) != 0) + err(1, "PTRACE_ATTACH"); + + if (waitpid(parent, &status, 0) != parent) + err(1, "waitpid for child"); + + if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[0]), dr0) != 0) + err(1, "PTRACE_POKEUSER DR0"); + + if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[1]), dr1) != 0) + err(1, "PTRACE_POKEUSER DR1"); + + if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[7]), dr7) != 0) + err(1, "PTRACE_POKEUSER DR7"); + + printf("\tDR0 = %lx, DR1 = %lx, DR7 = %lx\n", dr0, dr1, dr7); + + if (ptrace(PTRACE_DETACH, parent, NULL, NULL) != 0) + err(1, "PTRACE_DETACH"); + + exit(0); + } +} + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static char const * const signames[] = { + [SIGSEGV] = "SIGSEGV", + [SIGBUS] = "SIBGUS", + [SIGTRAP] = "SIGTRAP", + [SIGILL] = "SIGILL", +}; + +static void sigtrap(int sig, siginfo_t *si, void *ctx_void) +{ + ucontext_t *ctx = ctx_void; + + printf("\tGot SIGTRAP with RIP=%lx, EFLAGS.RF=%d\n", + (unsigned long)ctx->uc_mcontext.gregs[REG_IP], + !!(ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_RF)); +} + +static void handle_and_return(int sig, siginfo_t *si, void *ctx_void) +{ + ucontext_t *ctx = ctx_void; + + printf("\tGot %s with RIP=%lx\n", signames[sig], + (unsigned long)ctx->uc_mcontext.gregs[REG_IP]); +} + +static void handle_and_longjmp(int sig, siginfo_t *si, void *ctx_void) +{ + ucontext_t *ctx = ctx_void; + + printf("\tGot %s with RIP=%lx\n", signames[sig], + (unsigned long)ctx->uc_mcontext.gregs[REG_IP]); + + siglongjmp(jmpbuf, 1); +} + +int main() +{ + unsigned long nr; + + asm volatile ("mov %%ss, %[ss]" : [ss] "=m" (ss)); + printf("\tSS = 0x%hx, &SS = 0x%p\n", ss, &ss); + + if (prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0) == 0) + printf("\tPR_SET_PTRACER_ANY succeeded\n"); + + printf("\tSet up a watchpoint\n"); + sethandler(SIGTRAP, sigtrap, 0); + enable_watchpoint(); + + printf("[RUN]\tRead from watched memory (should get SIGTRAP)\n"); + asm volatile ("mov %[ss], %[tmp]" : [tmp] "=r" (nr) : [ss] "m" (ss)); + + printf("[RUN]\tMOV SS; INT3\n"); + asm volatile ("mov %[ss], %%ss; int3" :: [ss] "m" (ss)); + + printf("[RUN]\tMOV SS; INT 3\n"); + asm volatile ("mov %[ss], %%ss; .byte 0xcd, 0x3" :: [ss] "m" (ss)); + + printf("[RUN]\tMOV SS; CS CS INT3\n"); + asm volatile ("mov %[ss], %%ss; .byte 0x2e, 0x2e; int3" :: [ss] "m" (ss)); + + printf("[RUN]\tMOV SS; CSx14 INT3\n"); + asm volatile ("mov %[ss], %%ss; .fill 14,1,0x2e; int3" :: [ss] "m" (ss)); + + printf("[RUN]\tMOV SS; INT 4\n"); + sethandler(SIGSEGV, handle_and_return, SA_RESETHAND); + asm volatile ("mov %[ss], %%ss; int $4" :: [ss] "m" (ss)); + +#ifdef __i386__ + printf("[RUN]\tMOV SS; INTO\n"); + sethandler(SIGSEGV, handle_and_return, SA_RESETHAND); + nr = -1; + asm volatile ("add $1, %[tmp]; mov %[ss], %%ss; into" + : [tmp] "+r" (nr) : [ss] "m" (ss)); +#endif + + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; ICEBP\n"); + + /* Some emulators (e.g. QEMU TCG) don't emulate ICEBP. */ + sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND); + + asm volatile ("mov %[ss], %%ss; .byte 0xf1" :: [ss] "m" (ss)); + } + + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; CLI\n"); + sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); + asm volatile ("mov %[ss], %%ss; cli" :: [ss] "m" (ss)); + } + + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; #PF\n"); + sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); + asm volatile ("mov %[ss], %%ss; mov (-1), %[tmp]" + : [tmp] "=r" (nr) : [ss] "m" (ss)); + } + + /* + * INT $1: if #DB has DPL=3 and there isn't special handling, + * then the kernel will die. + */ + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; INT 1\n"); + sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); + asm volatile ("mov %[ss], %%ss; int $1" :: [ss] "m" (ss)); + } + +#ifdef __x86_64__ + /* + * In principle, we should test 32-bit SYSCALL as well, but + * the calling convention is so unpredictable that it's + * not obviously worth the effort. + */ + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; SYSCALL\n"); + sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND); + nr = SYS_getpid; + /* + * Toggle the high bit of RSP to make it noncanonical to + * strengthen this test on non-SMAP systems. + */ + asm volatile ("btc $63, %%rsp\n\t" + "mov %[ss], %%ss; syscall\n\t" + "btc $63, %%rsp" + : "+a" (nr) : [ss] "m" (ss) + : "rcx" +#ifdef __x86_64__ + , "r11" +#endif + ); + } +#endif + + printf("[RUN]\tMOV SS; breakpointed NOP\n"); + asm volatile ("mov %[ss], %%ss; breakpoint_insn: nop" :: [ss] "m" (ss)); + + /* + * Invoking SYSENTER directly breaks all the rules. Just handle + * the SIGSEGV. + */ + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; SYSENTER\n"); + stack_t stack = { + .ss_sp = altstack_data, + .ss_size = SIGSTKSZ, + }; + if (sigaltstack(&stack, NULL) != 0) + err(1, "sigaltstack"); + sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK); + nr = SYS_getpid; + asm volatile ("mov %[ss], %%ss; SYSENTER" : "+a" (nr) + : [ss] "m" (ss) : "flags", "rcx" +#ifdef __x86_64__ + , "r11" +#endif + ); + + /* We're unreachable here. SYSENTER forgets RIP. */ + } + + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; INT $0x80\n"); + sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); + nr = 20; /* compat getpid */ + asm volatile ("mov %[ss], %%ss; int $0x80" + : "+a" (nr) : [ss] "m" (ss) + : "flags" +#ifdef __x86_64__ + , "r8", "r9", "r10", "r11" +#endif + ); + } + + printf("[OK]\tI aten't dead\n"); + return 0; +} diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c index 9c0325e1ea68..50f7e9272481 100644 --- a/tools/testing/selftests/x86/mpx-mini-test.c +++ b/tools/testing/selftests/x86/mpx-mini-test.c @@ -368,6 +368,11 @@ static int expected_bnd_index = -1; uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */ unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS]; +/* Failed address bound checks: */ +#ifndef SEGV_BNDERR +# define SEGV_BNDERR 3 +#endif + /* * The kernel is supposed to provide some information about the bounds * exception in the siginfo. It should match what we have in the bounds @@ -419,8 +424,6 @@ void handler(int signum, siginfo_t *si, void *vucontext) br_count++; dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count); -#define SEGV_BNDERR 3 /* failed address bound checks */ - dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n", status, ip, br_reason); dprintf2("si_signo: %d\n", si->si_signo); diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h index b3cb7670e026..254e5436bdd9 100644 --- a/tools/testing/selftests/x86/pkey-helpers.h +++ b/tools/testing/selftests/x86/pkey-helpers.h @@ -26,30 +26,26 @@ static inline void sigsafe_printf(const char *format, ...) { va_list ap; - va_start(ap, format); if (!dprint_in_signal) { + va_start(ap, format); vprintf(format, ap); + va_end(ap); } else { int ret; - int len = vsnprintf(dprint_in_signal_buffer, - DPRINT_IN_SIGNAL_BUF_SIZE, - format, ap); /* - * len is amount that would have been printed, - * but actual write is truncated at BUF_SIZE. + * No printf() functions are signal-safe. + * They deadlock easily. Write the format + * string to get some output, even if + * incomplete. */ - if (len > DPRINT_IN_SIGNAL_BUF_SIZE) - len = DPRINT_IN_SIGNAL_BUF_SIZE; - ret = write(1, dprint_in_signal_buffer, len); + ret = write(1, format, strlen(format)); if (ret < 0) - abort(); + exit(1); } - va_end(ap); } #define dprintf_level(level, args...) do { \ if (level <= DEBUG_LEVEL) \ sigsafe_printf(args); \ - fflush(NULL); \ } while (0) #define dprintf0(args...) dprintf_level(0, args) #define dprintf1(args...) dprintf_level(1, args) diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c index f15aa5a76fe3..460b4bdf4c1e 100644 --- a/tools/testing/selftests/x86/protection_keys.c +++ b/tools/testing/selftests/x86/protection_keys.c @@ -72,10 +72,9 @@ extern void abort_hooks(void); test_nr, iteration_nr); \ dprintf0("errno at assert: %d", errno); \ abort_hooks(); \ - assert(condition); \ + exit(__LINE__); \ } \ } while (0) -#define raw_assert(cond) assert(cond) void cat_into_file(char *str, char *file) { @@ -87,12 +86,17 @@ void cat_into_file(char *str, char *file) * these need to be raw because they are called under * pkey_assert() */ - raw_assert(fd >= 0); + if (fd < 0) { + fprintf(stderr, "error opening '%s'\n", str); + perror("error: "); + exit(__LINE__); + } + ret = write(fd, str, strlen(str)); if (ret != strlen(str)) { perror("write to file failed"); fprintf(stderr, "filename: '%s' str: '%s'\n", file, str); - raw_assert(0); + exit(__LINE__); } close(fd); } @@ -191,26 +195,30 @@ void lots_o_noops_around_write(int *write_to_me) #ifdef __i386__ #ifndef SYS_mprotect_key -# define SYS_mprotect_key 380 +# define SYS_mprotect_key 380 #endif + #ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 381 -# define SYS_pkey_free 382 +# define SYS_pkey_alloc 381 +# define SYS_pkey_free 382 #endif -#define REG_IP_IDX REG_EIP -#define si_pkey_offset 0x14 + +#define REG_IP_IDX REG_EIP +#define si_pkey_offset 0x14 #else #ifndef SYS_mprotect_key -# define SYS_mprotect_key 329 +# define SYS_mprotect_key 329 #endif + #ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 330 -# define SYS_pkey_free 331 +# define SYS_pkey_alloc 330 +# define SYS_pkey_free 331 #endif -#define REG_IP_IDX REG_RIP -#define si_pkey_offset 0x20 + +#define REG_IP_IDX REG_RIP +#define si_pkey_offset 0x20 #endif @@ -225,8 +233,14 @@ void dump_mem(void *dumpme, int len_bytes) } } -#define SEGV_BNDERR 3 /* failed address bound checks */ -#define SEGV_PKUERR 4 +/* Failed address bound checks: */ +#ifndef SEGV_BNDERR +# define SEGV_BNDERR 3 +#endif + +#ifndef SEGV_PKUERR +# define SEGV_PKUERR 4 +#endif static char *si_code_str(int si_code) { @@ -289,13 +303,6 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) dump_mem(pkru_ptr - 128, 256); pkey_assert(*pkru_ptr); - si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); - dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); - dump_mem(si_pkey_ptr - 8, 24); - siginfo_pkey = *si_pkey_ptr; - pkey_assert(siginfo_pkey < NR_PKEYS); - last_si_pkey = siginfo_pkey; - if ((si->si_code == SEGV_MAPERR) || (si->si_code == SEGV_ACCERR) || (si->si_code == SEGV_BNDERR)) { @@ -303,6 +310,13 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) exit(4); } + si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); + dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); + dump_mem((u8 *)si_pkey_ptr - 8, 24); + siginfo_pkey = *si_pkey_ptr; + pkey_assert(siginfo_pkey < NR_PKEYS); + last_si_pkey = siginfo_pkey; + dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); /* need __rdpkru() version so we do not do shadow_pkru checking */ dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); @@ -311,22 +325,6 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); pkru_faults++; dprintf1("<<<<==================================================\n"); - return; - if (trapno == 14) { - fprintf(stderr, - "ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n", - trapno, ip); - fprintf(stderr, "si_addr %p\n", si->si_addr); - fprintf(stderr, "REG_ERR: %lx\n", - (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); - exit(1); - } else { - fprintf(stderr, "unexpected trap %d! at 0x%lx\n", trapno, ip); - fprintf(stderr, "si_addr %p\n", si->si_addr); - fprintf(stderr, "REG_ERR: %lx\n", - (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); - exit(2); - } dprint_in_signal = 0; } @@ -393,10 +391,15 @@ pid_t fork_lazy_child(void) return forkret; } -#define PKEY_DISABLE_ACCESS 0x1 -#define PKEY_DISABLE_WRITE 0x2 +#ifndef PKEY_DISABLE_ACCESS +# define PKEY_DISABLE_ACCESS 0x1 +#endif + +#ifndef PKEY_DISABLE_WRITE +# define PKEY_DISABLE_WRITE 0x2 +#endif -u32 pkey_get(int pkey, unsigned long flags) +static u32 hw_pkey_get(int pkey, unsigned long flags) { u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); u32 pkru = __rdpkru(); @@ -418,7 +421,7 @@ u32 pkey_get(int pkey, unsigned long flags) return masked_pkru; } -int pkey_set(int pkey, unsigned long rights, unsigned long flags) +static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags) { u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); u32 old_pkru = __rdpkru(); @@ -452,15 +455,15 @@ void pkey_disable_set(int pkey, int flags) pkey, flags); pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); - pkey_rights = pkey_get(pkey, syscall_flags); + pkey_rights = hw_pkey_get(pkey, syscall_flags); - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights); pkey_assert(pkey_rights >= 0); pkey_rights |= flags; - ret = pkey_set(pkey, pkey_rights, syscall_flags); + ret = hw_pkey_set(pkey, pkey_rights, syscall_flags); assert(!ret); /*pkru and flags have the same format */ shadow_pkru |= flags << (pkey * 2); @@ -468,8 +471,8 @@ void pkey_disable_set(int pkey, int flags) pkey_assert(ret >= 0); - pkey_rights = pkey_get(pkey, syscall_flags); - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + pkey_rights = hw_pkey_get(pkey, syscall_flags); + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights); dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); @@ -483,24 +486,24 @@ void pkey_disable_clear(int pkey, int flags) { unsigned long syscall_flags = 0; int ret; - int pkey_rights = pkey_get(pkey, syscall_flags); + int pkey_rights = hw_pkey_get(pkey, syscall_flags); u32 orig_pkru = rdpkru(); pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights); pkey_assert(pkey_rights >= 0); pkey_rights |= flags; - ret = pkey_set(pkey, pkey_rights, 0); + ret = hw_pkey_set(pkey, pkey_rights, 0); /* pkru and flags have the same format */ shadow_pkru &= ~(flags << (pkey * 2)); pkey_assert(ret >= 0); - pkey_rights = pkey_get(pkey, syscall_flags); - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + pkey_rights = hw_pkey_get(pkey, syscall_flags); + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights); dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); @@ -674,10 +677,12 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, struct pkey_malloc_record { void *ptr; long size; + int prot; }; struct pkey_malloc_record *pkey_malloc_records; +struct pkey_malloc_record *pkey_last_malloc_record; long nr_pkey_malloc_records; -void record_pkey_malloc(void *ptr, long size) +void record_pkey_malloc(void *ptr, long size, int prot) { long i; struct pkey_malloc_record *rec = NULL; @@ -709,6 +714,8 @@ void record_pkey_malloc(void *ptr, long size) (int)(rec - pkey_malloc_records), rec, ptr, size); rec->ptr = ptr; rec->size = size; + rec->prot = prot; + pkey_last_malloc_record = rec; nr_pkey_malloc_records++; } @@ -753,7 +760,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) pkey_assert(ptr != (void *)-1); ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); pkey_assert(!ret); - record_pkey_malloc(ptr, size); + record_pkey_malloc(ptr, size, prot); rdpkru(); dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); @@ -774,7 +781,7 @@ void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) size = ALIGN_UP(size, HPAGE_SIZE * 2); ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); pkey_assert(ptr != (void *)-1); - record_pkey_malloc(ptr, size); + record_pkey_malloc(ptr, size, prot); mprotect_pkey(ptr, size, prot, pkey); dprintf1("unaligned ptr: %p\n", ptr); @@ -847,7 +854,7 @@ void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) pkey_assert(ptr != (void *)-1); mprotect_pkey(ptr, size, prot, pkey); - record_pkey_malloc(ptr, size); + record_pkey_malloc(ptr, size, prot); dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr); return ptr; @@ -869,7 +876,7 @@ void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey) mprotect_pkey(ptr, size, prot, pkey); - record_pkey_malloc(ptr, size); + record_pkey_malloc(ptr, size, prot); dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr); close(fd); @@ -918,13 +925,21 @@ void *malloc_pkey(long size, int prot, u16 pkey) } int last_pkru_faults; +#define UNKNOWN_PKEY -2 void expected_pk_fault(int pkey) { dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n", __func__, last_pkru_faults, pkru_faults); dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); pkey_assert(last_pkru_faults + 1 == pkru_faults); - pkey_assert(last_si_pkey == pkey); + + /* + * For exec-only memory, we do not know the pkey in + * advance, so skip this check. + */ + if (pkey != UNKNOWN_PKEY) + pkey_assert(last_si_pkey == pkey); + /* * The signal handler shold have cleared out PKRU to let the * test program continue. We now have to restore it. @@ -939,10 +954,11 @@ void expected_pk_fault(int pkey) last_si_pkey = -1; } -void do_not_expect_pk_fault(void) -{ - pkey_assert(last_pkru_faults == pkru_faults); -} +#define do_not_expect_pk_fault(msg) do { \ + if (last_pkru_faults != pkru_faults) \ + dprintf0("unexpected PK fault: %s\n", msg); \ + pkey_assert(last_pkru_faults == pkru_faults); \ +} while (0) int test_fds[10] = { -1 }; int nr_test_fds; @@ -1151,12 +1167,15 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) pkey_assert(i < NR_PKEYS*2); /* - * There are 16 pkeys supported in hardware. One is taken - * up for the default (0) and another can be taken up by - * an execute-only mapping. Ensure that we can allocate - * at least 14 (16-2). + * There are 16 pkeys supported in hardware. Three are + * allocated by the time we get here: + * 1. The default key (0) + * 2. One possibly consumed by an execute-only mapping. + * 3. One allocated by the test code and passed in via + * 'pkey' to this function. + * Ensure that we can allocate at least another 13 (16-3). */ - pkey_assert(i >= NR_PKEYS-2); + pkey_assert(i >= NR_PKEYS-3); for (i = 0; i < nr_allocated_pkeys; i++) { err = sys_pkey_free(allocated_pkeys[i]); @@ -1165,6 +1184,35 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) } } +/* + * pkey 0 is special. It is allocated by default, so you do not + * have to call pkey_alloc() to use it first. Make sure that it + * is usable. + */ +void test_mprotect_with_pkey_0(int *ptr, u16 pkey) +{ + long size; + int prot; + + assert(pkey_last_malloc_record); + size = pkey_last_malloc_record->size; + /* + * This is a bit of a hack. But mprotect() requires + * huge-page-aligned sizes when operating on hugetlbfs. + * So, make sure that we use something that's a multiple + * of a huge page when we can. + */ + if (size >= HPAGE_SIZE) + size = HPAGE_SIZE; + prot = pkey_last_malloc_record->prot; + + /* Use pkey 0 */ + mprotect_pkey(ptr, size, prot, 0); + + /* Make sure that we can set it back to the original pkey. */ + mprotect_pkey(ptr, size, prot, pkey); +} + void test_ptrace_of_child(int *ptr, u16 pkey) { __attribute__((__unused__)) int peek_result; @@ -1228,7 +1276,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey) pkey_assert(ret != -1); /* Now access from the current task, and expect NO exception: */ peek_result = read_ptr(plain_ptr); - do_not_expect_pk_fault(); + do_not_expect_pk_fault("read plain pointer after ptrace"); ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); pkey_assert(ret != -1); @@ -1241,12 +1289,9 @@ void test_ptrace_of_child(int *ptr, u16 pkey) free(plain_ptr_unaligned); } -void test_executing_on_unreadable_memory(int *ptr, u16 pkey) +void *get_pointer_to_instructions(void) { void *p1; - int scratch; - int ptr_contents; - int ret; p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE); dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write); @@ -1256,7 +1301,23 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey) /* Point 'p1' at the *second* page of the function: */ p1 += PAGE_SIZE; + /* + * Try to ensure we fault this in on next touch to ensure + * we get an instruction fault as opposed to a data one + */ madvise(p1, PAGE_SIZE, MADV_DONTNEED); + + return p1; +} + +void test_executing_on_unreadable_memory(int *ptr, u16 pkey) +{ + void *p1; + int scratch; + int ptr_contents; + int ret; + + p1 = get_pointer_to_instructions(); lots_o_noops_around_write(&scratch); ptr_contents = read_ptr(p1); dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); @@ -1272,12 +1333,55 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey) */ madvise(p1, PAGE_SIZE, MADV_DONTNEED); lots_o_noops_around_write(&scratch); - do_not_expect_pk_fault(); + do_not_expect_pk_fault("executing on PROT_EXEC memory"); ptr_contents = read_ptr(p1); dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); expected_pk_fault(pkey); } +void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) +{ + void *p1; + int scratch; + int ptr_contents; + int ret; + + dprintf1("%s() start\n", __func__); + + p1 = get_pointer_to_instructions(); + lots_o_noops_around_write(&scratch); + ptr_contents = read_ptr(p1); + dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); + + /* Use a *normal* mprotect(), not mprotect_pkey(): */ + ret = mprotect(p1, PAGE_SIZE, PROT_EXEC); + pkey_assert(!ret); + + dprintf2("pkru: %x\n", rdpkru()); + + /* Make sure this is an *instruction* fault */ + madvise(p1, PAGE_SIZE, MADV_DONTNEED); + lots_o_noops_around_write(&scratch); + do_not_expect_pk_fault("executing on PROT_EXEC memory"); + ptr_contents = read_ptr(p1); + dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); + expected_pk_fault(UNKNOWN_PKEY); + + /* + * Put the memory back to non-PROT_EXEC. Should clear the + * exec-only pkey off the VMA and allow it to be readable + * again. Go to PROT_NONE first to check for a kernel bug + * that did not clear the pkey when doing PROT_NONE. + */ + ret = mprotect(p1, PAGE_SIZE, PROT_NONE); + pkey_assert(!ret); + + ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC); + pkey_assert(!ret); + ptr_contents = read_ptr(p1); + do_not_expect_pk_fault("plain read on recently PROT_EXEC area"); +} + void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) { int size = PAGE_SIZE; @@ -1302,6 +1406,8 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = { test_kernel_gup_of_access_disabled_region, test_kernel_gup_write_to_write_disabled_region, test_executing_on_unreadable_memory, + test_implicit_mprotect_exec_only_memory, + test_mprotect_with_pkey_0, test_ptrace_of_child, test_pkey_syscalls_on_non_allocated_pkey, test_pkey_syscalls_bad_args, diff --git a/tools/testing/selftests/x86/test_syscall_vdso.c b/tools/testing/selftests/x86/test_syscall_vdso.c index 40370354d4c1..c9c3281077bc 100644 --- a/tools/testing/selftests/x86/test_syscall_vdso.c +++ b/tools/testing/selftests/x86/test_syscall_vdso.c @@ -100,12 +100,19 @@ asm ( " shl $32, %r8\n" " orq $0x7f7f7f7f, %r8\n" " movq %r8, %r9\n" - " movq %r8, %r10\n" - " movq %r8, %r11\n" - " movq %r8, %r12\n" - " movq %r8, %r13\n" - " movq %r8, %r14\n" - " movq %r8, %r15\n" + " incq %r9\n" + " movq %r9, %r10\n" + " incq %r10\n" + " movq %r10, %r11\n" + " incq %r11\n" + " movq %r11, %r12\n" + " incq %r12\n" + " movq %r12, %r13\n" + " incq %r13\n" + " movq %r13, %r14\n" + " incq %r14\n" + " movq %r14, %r15\n" + " incq %r15\n" " ret\n" " .code32\n" " .popsection\n" @@ -128,12 +135,13 @@ int check_regs64(void) int err = 0; int num = 8; uint64_t *r64 = ®s64.r8; + uint64_t expected = 0x7f7f7f7f7f7f7f7fULL; if (!kernel_is_64bit) return 0; do { - if (*r64 == 0x7f7f7f7f7f7f7f7fULL) + if (*r64 == expected++) continue; /* register did not change */ if (syscall_addr != (long)&int80) { /* @@ -147,18 +155,17 @@ int check_regs64(void) continue; } } else { - /* INT80 syscall entrypoint can be used by + /* + * INT80 syscall entrypoint can be used by * 64-bit programs too, unlike SYSCALL/SYSENTER. * Therefore it must preserve R12+ * (they are callee-saved registers in 64-bit C ABI). * - * This was probably historically not intended, - * but R8..11 are clobbered (cleared to 0). - * IOW: they are the only registers which aren't - * preserved across INT80 syscall. + * Starting in Linux 4.17 (and any kernel that + * backports the change), R8..11 are preserved. + * Historically (and probably unintentionally), they + * were clobbered or zeroed. */ - if (*r64 == 0 && num <= 11) - continue; } printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64); err++; |