diff options
Diffstat (limited to 'tools/testing')
248 files changed, 28780 insertions, 1757 deletions
diff --git a/tools/testing/ktest/config-bisect.pl b/tools/testing/ktest/config-bisect.pl new file mode 100755 index 000000000000..b28feea7c363 --- /dev/null +++ b/tools/testing/ktest/config-bisect.pl @@ -0,0 +1,770 @@ +#!/usr/bin/perl -w +# +# Copyright 2015 - Steven Rostedt, Red Hat Inc. +# Copyright 2017 - Steven Rostedt, VMware, Inc. +# +# Licensed under the terms of the GNU GPL License version 2 +# + +# usage: +# config-bisect.pl [options] good-config bad-config [good|bad] +# + +# Compares a good config to a bad config, then takes half of the diffs +# and produces a config that is somewhere between the good config and +# the bad config. That is, the resulting config will start with the +# good config and will try to make half of the differences of between +# the good and bad configs match the bad config. It tries because of +# dependencies between the two configs it may not be able to change +# exactly half of the configs that are different between the two config +# files. + +# Here's a normal way to use it: +# +# $ cd /path/to/linux/kernel +# $ config-bisect.pl /path/to/good/config /path/to/bad/config + +# This will now pull in good config (blowing away .config in that directory +# so do not make that be one of the good or bad configs), and then +# build the config with "make oldconfig" to make sure it matches the +# current kernel. It will then store the configs in that result for +# the good config. It does the same for the bad config as well. +# The algorithm will run, merging half of the differences between +# the two configs and building them with "make oldconfig" to make sure +# the result changes (dependencies may reset changes the tool had made). +# It then copies the result of its good config to /path/to/good/config.tmp +# and the bad config to /path/to/bad/config.tmp (just appends ".tmp" to the +# files passed in). And the ".config" that you should test will be in +# directory + +# After the first run, determine if the result is good or bad then +# run the same command appending the result + +# For good results: +# $ config-bisect.pl /path/to/good/config /path/to/bad/config good + +# For bad results: +# $ config-bisect.pl /path/to/good/config /path/to/bad/config bad + +# Do not change the good-config or bad-config, config-bisect.pl will +# copy the good-config to a temp file with the same name as good-config +# but with a ".tmp" after it. It will do the same with the bad-config. + +# If "good" or "bad" is not stated at the end, it will copy the good and +# bad configs to the .tmp versions. If a .tmp version already exists, it will +# warn before writing over them (-r will not warn, and just write over them). +# If the last config is labeled "good", then it will copy it to the good .tmp +# version. If the last config is labeled "bad", it will copy it to the bad +# .tmp version. It will continue this until it can not merge the two any more +# without the result being equal to either the good or bad .tmp configs. + +my $start = 0; +my $val = ""; + +my $pwd = `pwd`; +chomp $pwd; +my $tree = $pwd; +my $build; + +my $output_config; +my $reset_bisect; + +sub usage { + print << "EOF" + +usage: config-bisect.pl [-l linux-tree][-b build-dir] good-config bad-config [good|bad] + -l [optional] define location of linux-tree (default is current directory) + -b [optional] define location to build (O=build-dir) (default is linux-tree) + good-config the config that is considered good + bad-config the config that does not work + "good" add this if the last run produced a good config + "bad" add this if the last run produced a bad config + If "good" or "bad" is not specified, then it is the start of a new bisect + + Note, each run will create copy of good and bad configs with ".tmp" appended. + +EOF +; + + exit(-1); +} + +sub doprint { + print @_; +} + +sub dodie { + doprint "CRITICAL FAILURE... ", @_, "\n"; + + die @_, "\n"; +} + +sub expand_path { + my ($file) = @_; + + if ($file =~ m,^/,) { + return $file; + } + return "$pwd/$file"; +} + +sub read_prompt { + my ($cancel, $prompt) = @_; + + my $ans; + + for (;;) { + if ($cancel) { + print "$prompt [y/n/C] "; + } else { + print "$prompt [y/N] "; + } + $ans = <STDIN>; + chomp $ans; + if ($ans =~ /^\s*$/) { + if ($cancel) { + $ans = "c"; + } else { + $ans = "n"; + } + } + last if ($ans =~ /^y$/i || $ans =~ /^n$/i); + if ($cancel) { + last if ($ans =~ /^c$/i); + print "Please answer either 'y', 'n' or 'c'.\n"; + } else { + print "Please answer either 'y' or 'n'.\n"; + } + } + if ($ans =~ /^c/i) { + exit; + } + if ($ans !~ /^y$/i) { + return 0; + } + return 1; +} + +sub read_yn { + my ($prompt) = @_; + + return read_prompt 0, $prompt; +} + +sub read_ync { + my ($prompt) = @_; + + return read_prompt 1, $prompt; +} + +sub run_command { + my ($command, $redirect) = @_; + my $start_time; + my $end_time; + my $dord = 0; + my $pid; + + $start_time = time; + + doprint("$command ... "); + + $pid = open(CMD, "$command 2>&1 |") or + dodie "unable to exec $command"; + + if (defined($redirect)) { + open (RD, ">$redirect") or + dodie "failed to write to redirect $redirect"; + $dord = 1; + } + + while (<CMD>) { + print RD if ($dord); + } + + waitpid($pid, 0); + my $failed = $?; + + close(CMD); + close(RD) if ($dord); + + $end_time = time; + my $delta = $end_time - $start_time; + + if ($delta == 1) { + doprint "[1 second] "; + } else { + doprint "[$delta seconds] "; + } + + if ($failed) { + doprint "FAILED!\n"; + } else { + doprint "SUCCESS\n"; + } + + return !$failed; +} + +###### CONFIG BISECT ###### + +# config_ignore holds the configs that were set (or unset) for +# a good config and we will ignore these configs for the rest +# of a config bisect. These configs stay as they were. +my %config_ignore; + +# config_set holds what all configs were set as. +my %config_set; + +# config_off holds the set of configs that the bad config had disabled. +# We need to record them and set them in the .config when running +# olddefconfig, because olddefconfig keeps the defaults. +my %config_off; + +# config_off_tmp holds a set of configs to turn off for now +my @config_off_tmp; + +# config_list is the set of configs that are being tested +my %config_list; +my %null_config; + +my %dependency; + +my $make; + +sub make_oldconfig { + + if (!run_command "$make olddefconfig") { + # Perhaps olddefconfig doesn't exist in this version of the kernel + # try oldnoconfig + doprint "olddefconfig failed, trying make oldnoconfig\n"; + if (!run_command "$make oldnoconfig") { + doprint "oldnoconfig failed, trying yes '' | make oldconfig\n"; + # try a yes '' | oldconfig + run_command "yes '' | $make oldconfig" or + dodie "failed make config oldconfig"; + } + } +} + +sub assign_configs { + my ($hash, $config) = @_; + + doprint "Reading configs from $config\n"; + + open (IN, $config) + or dodie "Failed to read $config"; + + while (<IN>) { + chomp; + if (/^((CONFIG\S*)=.*)/) { + ${$hash}{$2} = $1; + } elsif (/^(# (CONFIG\S*) is not set)/) { + ${$hash}{$2} = $1; + } + } + + close(IN); +} + +sub process_config_ignore { + my ($config) = @_; + + assign_configs \%config_ignore, $config; +} + +sub get_dependencies { + my ($config) = @_; + + my $arr = $dependency{$config}; + if (!defined($arr)) { + return (); + } + + my @deps = @{$arr}; + + foreach my $dep (@{$arr}) { + print "ADD DEP $dep\n"; + @deps = (@deps, get_dependencies $dep); + } + + return @deps; +} + +sub save_config { + my ($pc, $file) = @_; + + my %configs = %{$pc}; + + doprint "Saving configs into $file\n"; + + open(OUT, ">$file") or dodie "Can not write to $file"; + + foreach my $config (keys %configs) { + print OUT "$configs{$config}\n"; + } + close(OUT); +} + +sub create_config { + my ($name, $pc) = @_; + + doprint "Creating old config from $name configs\n"; + + save_config $pc, $output_config; + + make_oldconfig; +} + +# compare two config hashes, and return configs with different vals. +# It returns B's config values, but you can use A to see what A was. +sub diff_config_vals { + my ($pa, $pb) = @_; + + # crappy Perl way to pass in hashes. + my %a = %{$pa}; + my %b = %{$pb}; + + my %ret; + + foreach my $item (keys %a) { + if (defined($b{$item}) && $b{$item} ne $a{$item}) { + $ret{$item} = $b{$item}; + } + } + + return %ret; +} + +# compare two config hashes and return the configs in B but not A +sub diff_configs { + my ($pa, $pb) = @_; + + my %ret; + + # crappy Perl way to pass in hashes. + my %a = %{$pa}; + my %b = %{$pb}; + + foreach my $item (keys %b) { + if (!defined($a{$item})) { + $ret{$item} = $b{$item}; + } + } + + return %ret; +} + +# return if two configs are equal or not +# 0 is equal +1 b has something a does not +# +1 if a and b have a different item. +# -1 if a has something b does not +sub compare_configs { + my ($pa, $pb) = @_; + + my %ret; + + # crappy Perl way to pass in hashes. + my %a = %{$pa}; + my %b = %{$pb}; + + foreach my $item (keys %b) { + if (!defined($a{$item})) { + return 1; + } + if ($a{$item} ne $b{$item}) { + return 1; + } + } + + foreach my $item (keys %a) { + if (!defined($b{$item})) { + return -1; + } + } + + return 0; +} + +sub process_failed { + my ($config) = @_; + + doprint "\n\n***************************************\n"; + doprint "Found bad config: $config\n"; + doprint "***************************************\n\n"; +} + +sub process_new_config { + my ($tc, $nc, $gc, $bc) = @_; + + my %tmp_config = %{$tc}; + my %good_configs = %{$gc}; + my %bad_configs = %{$bc}; + + my %new_configs; + + my $runtest = 1; + my $ret; + + create_config "tmp_configs", \%tmp_config; + assign_configs \%new_configs, $output_config; + + $ret = compare_configs \%new_configs, \%bad_configs; + if (!$ret) { + doprint "New config equals bad config, try next test\n"; + $runtest = 0; + } + + if ($runtest) { + $ret = compare_configs \%new_configs, \%good_configs; + if (!$ret) { + doprint "New config equals good config, try next test\n"; + $runtest = 0; + } + } + + %{$nc} = %new_configs; + + return $runtest; +} + +sub convert_config { + my ($config) = @_; + + if ($config =~ /^# (.*) is not set/) { + $config = "$1=n"; + } + + $config =~ s/^CONFIG_//; + return $config; +} + +sub print_config { + my ($sym, $config) = @_; + + $config = convert_config $config; + doprint "$sym$config\n"; +} + +sub print_config_compare { + my ($good_config, $bad_config) = @_; + + $good_config = convert_config $good_config; + $bad_config = convert_config $bad_config; + + my $good_value = $good_config; + my $bad_value = $bad_config; + $good_value =~ s/(.*)=//; + my $config = $1; + + $bad_value =~ s/.*=//; + + doprint " $config $good_value -> $bad_value\n"; +} + +# Pass in: +# $phalf: half of the configs names you want to add +# $oconfigs: The orginial configs to start with +# $sconfigs: The source to update $oconfigs with (from $phalf) +# $which: The name of which half that is updating (top / bottom) +# $type: The name of the source type (good / bad) +sub make_half { + my ($phalf, $oconfigs, $sconfigs, $which, $type) = @_; + + my @half = @{$phalf}; + my %orig_configs = %{$oconfigs}; + my %source_configs = %{$sconfigs}; + + my %tmp_config = %orig_configs; + + doprint "Settings bisect with $which half of $type configs:\n"; + foreach my $item (@half) { + doprint "Updating $item to $source_configs{$item}\n"; + $tmp_config{$item} = $source_configs{$item}; + } + + return %tmp_config; +} + +sub run_config_bisect { + my ($pgood, $pbad) = @_; + + my %good_configs = %{$pgood}; + my %bad_configs = %{$pbad}; + + my %diff_configs = diff_config_vals \%good_configs, \%bad_configs; + my %b_configs = diff_configs \%good_configs, \%bad_configs; + my %g_configs = diff_configs \%bad_configs, \%good_configs; + + # diff_arr is what is in both good and bad but are different (y->n) + my @diff_arr = keys %diff_configs; + my $len_diff = $#diff_arr + 1; + + # b_arr is what is in bad but not in good (has depends) + my @b_arr = keys %b_configs; + my $len_b = $#b_arr + 1; + + # g_arr is what is in good but not in bad + my @g_arr = keys %g_configs; + my $len_g = $#g_arr + 1; + + my $runtest = 0; + my %new_configs; + my $ret; + + # Look at the configs that are different between good and bad. + # This does not include those that depend on other configs + # (configs depending on other configs that are not set would + # not show up even as a "# CONFIG_FOO is not set" + + + doprint "# of configs to check: $len_diff\n"; + doprint "# of configs showing only in good: $len_g\n"; + doprint "# of configs showing only in bad: $len_b\n"; + + if ($len_diff > 0) { + # Now test for different values + + doprint "Configs left to check:\n"; + doprint " Good Config\t\t\tBad Config\n"; + doprint " -----------\t\t\t----------\n"; + foreach my $item (@diff_arr) { + doprint " $good_configs{$item}\t$bad_configs{$item}\n"; + } + + my $half = int($#diff_arr / 2); + my @tophalf = @diff_arr[0 .. $half]; + + doprint "Set tmp config to be good config with some bad config values\n"; + + my %tmp_config = make_half \@tophalf, \%good_configs, + \%bad_configs, "top", "bad"; + + $runtest = process_new_config \%tmp_config, \%new_configs, + \%good_configs, \%bad_configs; + + if (!$runtest) { + doprint "Set tmp config to be bad config with some good config values\n"; + + my %tmp_config = make_half \@tophalf, \%bad_configs, + \%good_configs, "top", "good"; + + $runtest = process_new_config \%tmp_config, \%new_configs, + \%good_configs, \%bad_configs; + } + } + + if (!$runtest && $len_diff > 0) { + # do the same thing, but this time with bottom half + + my $half = int($#diff_arr / 2); + my @bottomhalf = @diff_arr[$half+1 .. $#diff_arr]; + + doprint "Set tmp config to be good config with some bad config values\n"; + + my %tmp_config = make_half \@bottomhalf, \%good_configs, + \%bad_configs, "bottom", "bad"; + + $runtest = process_new_config \%tmp_config, \%new_configs, + \%good_configs, \%bad_configs; + + if (!$runtest) { + doprint "Set tmp config to be bad config with some good config values\n"; + + my %tmp_config = make_half \@bottomhalf, \%bad_configs, + \%good_configs, "bottom", "good"; + + $runtest = process_new_config \%tmp_config, \%new_configs, + \%good_configs, \%bad_configs; + } + } + + if ($runtest) { + make_oldconfig; + doprint "READY TO TEST .config IN $build\n"; + return 0; + } + + doprint "\n%%%%%%%% FAILED TO FIND SINGLE BAD CONFIG %%%%%%%%\n"; + doprint "Hmm, can't make any more changes without making good == bad?\n"; + doprint "Difference between good (+) and bad (-)\n"; + + foreach my $item (keys %bad_configs) { + if (!defined($good_configs{$item})) { + print_config "-", $bad_configs{$item}; + } + } + + foreach my $item (keys %good_configs) { + next if (!defined($bad_configs{$item})); + if ($good_configs{$item} ne $bad_configs{$item}) { + print_config_compare $good_configs{$item}, $bad_configs{$item}; + } + } + + foreach my $item (keys %good_configs) { + if (!defined($bad_configs{$item})) { + print_config "+", $good_configs{$item}; + } + } + return -1; +} + +sub config_bisect { + my ($good_config, $bad_config) = @_; + my $ret; + + my %good_configs; + my %bad_configs; + my %tmp_configs; + + doprint "Run good configs through make oldconfig\n"; + assign_configs \%tmp_configs, $good_config; + create_config "$good_config", \%tmp_configs; + assign_configs \%good_configs, $output_config; + + doprint "Run bad configs through make oldconfig\n"; + assign_configs \%tmp_configs, $bad_config; + create_config "$bad_config", \%tmp_configs; + assign_configs \%bad_configs, $output_config; + + save_config \%good_configs, $good_config; + save_config \%bad_configs, $bad_config; + + return run_config_bisect \%good_configs, \%bad_configs; +} + +while ($#ARGV >= 0) { + if ($ARGV[0] !~ m/^-/) { + last; + } + my $opt = shift @ARGV; + + if ($opt eq "-b") { + $val = shift @ARGV; + if (!defined($val)) { + die "-b requires value\n"; + } + $build = $val; + } + + elsif ($opt eq "-l") { + $val = shift @ARGV; + if (!defined($val)) { + die "-l requires value\n"; + } + $tree = $val; + } + + elsif ($opt eq "-r") { + $reset_bisect = 1; + } + + elsif ($opt eq "-h") { + usage; + } + + else { + die "Unknow option $opt\n"; + } +} + +$build = $tree if (!defined($build)); + +$tree = expand_path $tree; +$build = expand_path $build; + +if ( ! -d $tree ) { + die "$tree not a directory\n"; +} + +if ( ! -d $build ) { + die "$build not a directory\n"; +} + +usage if $#ARGV < 1; + +if ($#ARGV == 1) { + $start = 1; +} elsif ($#ARGV == 2) { + $val = $ARGV[2]; + if ($val ne "good" && $val ne "bad") { + die "Unknown command '$val', bust be either \"good\" or \"bad\"\n"; + } +} else { + usage; +} + +my $good_start = expand_path $ARGV[0]; +my $bad_start = expand_path $ARGV[1]; + +my $good = "$good_start.tmp"; +my $bad = "$bad_start.tmp"; + +$make = "make"; + +if ($build ne $tree) { + $make = "make O=$build" +} + +$output_config = "$build/.config"; + +if ($start) { + if ( ! -f $good_start ) { + die "$good_start not found\n"; + } + if ( ! -f $bad_start ) { + die "$bad_start not found\n"; + } + if ( -f $good || -f $bad ) { + my $p = ""; + + if ( -f $good ) { + $p = "$good exists\n"; + } + + if ( -f $bad ) { + $p = "$p$bad exists\n"; + } + + if (!defined($reset_bisect)) { + if (!read_yn "${p}Overwrite and start new bisect anyway?") { + exit (-1); + } + } + } + run_command "cp $good_start $good" or die "failed to copy to $good\n"; + run_command "cp $bad_start $bad" or die "faield to copy to $bad\n"; +} else { + if ( ! -f $good ) { + die "Can not find file $good\n"; + } + if ( ! -f $bad ) { + die "Can not find file $bad\n"; + } + if ($val eq "good") { + run_command "cp $output_config $good" or die "failed to copy $config to $good\n"; + } elsif ($val eq "bad") { + run_command "cp $output_config $bad" or die "failed to copy $config to $bad\n"; + } +} + +chdir $tree || die "can't change directory to $tree"; + +my $ret = config_bisect $good, $bad; + +if (!$ret) { + exit(0); +} + +if ($ret > 0) { + doprint "Cleaning temp files\n"; + run_command "rm $good"; + run_command "rm $bad"; + exit(1); +} else { + doprint "See good and bad configs for details:\n"; + doprint "good: $good\n"; + doprint "bad: $bad\n"; + doprint "%%%%%%%% FAILED TO FIND SINGLE BAD CONFIG %%%%%%%%\n"; +} +exit(2); diff --git a/tools/testing/ktest/examples/crosstests.conf b/tools/testing/ktest/examples/crosstests.conf index a1203148dfa1..6907f32590b2 100644 --- a/tools/testing/ktest/examples/crosstests.conf +++ b/tools/testing/ktest/examples/crosstests.conf @@ -59,7 +59,7 @@ DO_DEFAULT := 1 # By setting both DO_FAILED and DO_DEFAULT to zero, you can pick a single # arch that you want to test. (uncomment RUN and chose your arch) -#RUN := m32r +#RUN := arm # At the bottom of the config file exists a bisect test. You can update that # test and set DO_FAILED and DO_DEFAULT to zero, and uncomment this variable @@ -106,33 +106,11 @@ TEST_START IF ${RUN} == arm || ${DO_DEFAULT} CROSS = arm-unknown-linux-gnueabi ARCH = arm -# black fin -TEST_START IF ${RUN} == bfin || ${DO_DEFAULT} -CROSS = bfin-uclinux -ARCH = blackfin -BUILD_OPTIONS = -j8 vmlinux - -# cris - FAILS? -TEST_START IF ${RUN} == cris || ${RUN} == cris64 || ${DO_FAILED} -CROSS = cris-linux -ARCH = cris - -# cris32 - not right arch? -TEST_START IF ${RUN} == cris || ${RUN} == cris32 || ${DO_FAILED} -CROSS = crisv32-linux -ARCH = cris - # ia64 TEST_START IF ${RUN} == ia64 || ${DO_DEFAULT} CROSS = ia64-linux ARCH = ia64 -# frv -TEST_START IF ${RUN} == frv || ${DO_FAILED} -CROSS = frv-linux -ARCH = frv -GCC_VER = 4.5.1 - # m68k fails with error? TEST_START IF ${RUN} == m68k || ${DO_DEFAULT} CROSS = m68k-linux @@ -148,13 +126,6 @@ TEST_START IF ${RUN} == mips || ${RUN} == mips32 || ${DO_DEFAULT} CROSS = mips-linux ARCH = mips -# m32r -TEST_START IF ${RUN} == m32r || ${DO_FAILED} -CROSS = m32r-linux -ARCH = m32r -GCC_VER = 4.5.1 -BUILD_OPTIONS = -j8 vmlinux - # parisc64 failed? TEST_START IF ${RUN} == hppa || ${RUN} == hppa64 || ${DO_FAILED} CROSS = hppa64-linux diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 0c8b61f8398e..87af8a68ab25 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -10,6 +10,7 @@ use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK); use File::Path qw(mkpath); use File::Copy qw(cp); use FileHandle; +use FindBin; my $VERSION = "0.2"; @@ -22,6 +23,11 @@ my %evals; #default opts my %default = ( + "MAILER" => "sendmail", # default mailer + "EMAIL_ON_ERROR" => 1, + "EMAIL_WHEN_FINISHED" => 1, + "EMAIL_WHEN_CANCELED" => 0, + "EMAIL_WHEN_STARTED" => 0, "NUM_TESTS" => 1, "TEST_TYPE" => "build", "BUILD_TYPE" => "randconfig", @@ -59,6 +65,7 @@ my %default = ( "GRUB_REBOOT" => "grub2-reboot", "SYSLINUX" => "extlinux", "SYSLINUX_PATH" => "/boot/extlinux", + "CONNECT_TIMEOUT" => 25, # required, and we will ask users if they don't have them but we keep the default # value something that is common. @@ -163,6 +170,8 @@ my $store_failures; my $store_successes; my $test_name; my $timeout; +my $connect_timeout; +my $config_bisect_exec; my $booted_timeout; my $detect_triplefault; my $console; @@ -204,6 +213,20 @@ my $install_time; my $reboot_time; my $test_time; +my $pwd; +my $dirname = $FindBin::Bin; + +my $mailto; +my $mailer; +my $mail_path; +my $mail_command; +my $email_on_error; +my $email_when_finished; +my $email_when_started; +my $email_when_canceled; + +my $script_start_time = localtime(); + # set when a test is something other that just building or install # which would require more options. my $buildonly = 1; @@ -229,6 +252,14 @@ my $no_reboot = 1; my $reboot_success = 0; my %option_map = ( + "MAILTO" => \$mailto, + "MAILER" => \$mailer, + "MAIL_PATH" => \$mail_path, + "MAIL_COMMAND" => \$mail_command, + "EMAIL_ON_ERROR" => \$email_on_error, + "EMAIL_WHEN_FINISHED" => \$email_when_finished, + "EMAIL_WHEN_STARTED" => \$email_when_started, + "EMAIL_WHEN_CANCELED" => \$email_when_canceled, "MACHINE" => \$machine, "SSH_USER" => \$ssh_user, "TMP_DIR" => \$tmpdir, @@ -296,6 +327,8 @@ my %option_map = ( "STORE_SUCCESSES" => \$store_successes, "TEST_NAME" => \$test_name, "TIMEOUT" => \$timeout, + "CONNECT_TIMEOUT" => \$connect_timeout, + "CONFIG_BISECT_EXEC" => \$config_bisect_exec, "BOOTED_TIMEOUT" => \$booted_timeout, "CONSOLE" => \$console, "CLOSE_CONSOLE_SIGNAL" => \$close_console_signal, @@ -337,6 +370,7 @@ my %used_options; # default variables that can be used chomp ($variable{"PWD"} = `pwd`); +$pwd = $variable{"PWD"}; $config_help{"MACHINE"} = << "EOF" The machine hostname that you will test. @@ -718,22 +752,14 @@ sub set_value { my $prvalue = process_variables($rvalue); - if ($buildonly && $lvalue =~ /^TEST_TYPE(\[.*\])?$/ && $prvalue ne "build") { + if ($lvalue =~ /^(TEST|BISECT|CONFIG_BISECT)_TYPE(\[.*\])?$/ && + $prvalue !~ /^(config_|)bisect$/ && + $prvalue !~ /^build$/ && + $buildonly) { + # Note if a test is something other than build, then we # will need other mandatory options. if ($prvalue ne "install") { - # for bisect, we need to check BISECT_TYPE - if ($prvalue ne "bisect") { - $buildonly = 0; - } - } else { - # install still limits some mandatory options. - $buildonly = 2; - } - } - - if ($buildonly && $lvalue =~ /^BISECT_TYPE(\[.*\])?$/ && $prvalue ne "build") { - if ($prvalue ne "install") { $buildonly = 0; } else { # install still limits some mandatory options. @@ -1140,7 +1166,8 @@ sub __read_config { sub get_test_case { print "What test case would you like to run?\n"; print " (build, install or boot)\n"; - print " Other tests are available but require editing the config file\n"; + print " Other tests are available but require editing ktest.conf\n"; + print " (see tools/testing/ktest/sample.conf)\n"; my $ans = <STDIN>; chomp $ans; $default{"TEST_TYPE"} = $ans; @@ -1328,8 +1355,8 @@ sub reboot { my ($time) = @_; my $powercycle = 0; - # test if the machine can be connected to within 5 seconds - my $stat = run_ssh("echo check machine status", 5); + # test if the machine can be connected to within a few seconds + my $stat = run_ssh("echo check machine status", $connect_timeout); if (!$stat) { doprint("power cycle\n"); $powercycle = 1; @@ -1404,10 +1431,18 @@ sub do_not_reboot { return $test_type eq "build" || $no_reboot || ($test_type eq "patchcheck" && $opt{"PATCHCHECK_TYPE[$i]"} eq "build") || - ($test_type eq "bisect" && $opt{"BISECT_TYPE[$i]"} eq "build"); + ($test_type eq "bisect" && $opt{"BISECT_TYPE[$i]"} eq "build") || + ($test_type eq "config_bisect" && $opt{"CONFIG_BISECT_TYPE[$i]"} eq "build"); } +my $in_die = 0; + sub dodie { + + # avoid recusion + return if ($in_die); + $in_die = 1; + doprint "CRITICAL FAILURE... ", @_, "\n"; my $i = $iteration; @@ -1426,6 +1461,11 @@ sub dodie { print " See $opt{LOG_FILE} for more info.\n"; } + if ($email_on_error) { + send_email("KTEST: critical failure for your [$test_type] test", + "Your test started at $script_start_time has failed with:\n@_\n"); + } + if ($monitor_cnt) { # restore terminal settings system("stty $stty_orig"); @@ -1477,7 +1517,7 @@ sub exec_console { close($pts); exec $console or - die "Can't open console $console"; + dodie "Can't open console $console"; } sub open_console { @@ -1515,6 +1555,9 @@ sub close_console { doprint "kill child process $pid\n"; kill $close_console_signal, $pid; + doprint "wait for child process $pid to exit\n"; + waitpid($pid, 0); + print "closing!\n"; close($fp); @@ -1625,7 +1668,7 @@ sub save_logs { if (!-d $dir) { mkpath($dir) or - die "can't create $dir"; + dodie "can't create $dir"; } my %files = ( @@ -1638,7 +1681,7 @@ sub save_logs { while (my ($name, $source) = each(%files)) { if (-f "$source") { cp "$source", "$dir/$name" or - die "failed to copy $source"; + dodie "failed to copy $source"; } } @@ -1692,6 +1735,7 @@ sub run_command { my $end_time; my $dolog = 0; my $dord = 0; + my $dostdout = 0; my $pid; $command =~ s/\$SSH_USER/$ssh_user/g; @@ -1710,9 +1754,15 @@ sub run_command { } if (defined($redirect)) { - open (RD, ">$redirect") or - dodie "failed to write to redirect $redirect"; - $dord = 1; + if ($redirect eq 1) { + $dostdout = 1; + # Have the output of the command on its own line + doprint "\n"; + } else { + open (RD, ">$redirect") or + dodie "failed to write to redirect $redirect"; + $dord = 1; + } } my $hit_timeout = 0; @@ -1734,6 +1784,7 @@ sub run_command { } print LOG $line if ($dolog); print RD $line if ($dord); + print $line if ($dostdout); } waitpid($pid, 0); @@ -1812,7 +1863,7 @@ sub get_grub2_index { $ssh_grub =~ s,\$SSH_COMMAND,cat $grub_file,g; open(IN, "$ssh_grub |") - or die "unable to get $grub_file"; + or dodie "unable to get $grub_file"; my $found = 0; @@ -1821,13 +1872,13 @@ sub get_grub2_index { $grub_number++; $found = 1; last; - } elsif (/^menuentry\s/) { + } elsif (/^menuentry\s|^submenu\s/) { $grub_number++; } } close(IN); - die "Could not find '$grub_menu' in $grub_file on $machine" + dodie "Could not find '$grub_menu' in $grub_file on $machine" if (!$found); doprint "$grub_number\n"; $last_grub_menu = $grub_menu; @@ -1855,7 +1906,7 @@ sub get_grub_index { $ssh_grub =~ s,\$SSH_COMMAND,cat /boot/grub/menu.lst,g; open(IN, "$ssh_grub |") - or die "unable to get menu.lst"; + or dodie "unable to get menu.lst"; my $found = 0; @@ -1870,7 +1921,7 @@ sub get_grub_index { } close(IN); - die "Could not find '$grub_menu' in /boot/grub/menu on $machine" + dodie "Could not find '$grub_menu' in /boot/grub/menu on $machine" if (!$found); doprint "$grub_number\n"; $last_grub_menu = $grub_menu; @@ -1983,7 +2034,7 @@ sub monitor { my $full_line = ""; open(DMESG, "> $dmesg") or - die "unable to write to $dmesg"; + dodie "unable to write to $dmesg"; reboot_to; @@ -2862,7 +2913,7 @@ sub run_bisect { sub update_bisect_replay { my $tmp_log = "$tmpdir/ktest_bisect_log"; run_command "git bisect log > $tmp_log" or - die "can't create bisect log"; + dodie "can't create bisect log"; return $tmp_log; } @@ -2871,9 +2922,9 @@ sub bisect { my $result; - die "BISECT_GOOD[$i] not defined\n" if (!defined($bisect_good)); - die "BISECT_BAD[$i] not defined\n" if (!defined($bisect_bad)); - die "BISECT_TYPE[$i] not defined\n" if (!defined($bisect_type)); + dodie "BISECT_GOOD[$i] not defined\n" if (!defined($bisect_good)); + dodie "BISECT_BAD[$i] not defined\n" if (!defined($bisect_bad)); + dodie "BISECT_TYPE[$i] not defined\n" if (!defined($bisect_type)); my $good = $bisect_good; my $bad = $bisect_bad; @@ -2936,7 +2987,7 @@ sub bisect { if ($check ne "good") { doprint "TESTING BISECT BAD [$bad]\n"; run_command "git checkout $bad" or - die "Failed to checkout $bad"; + dodie "Failed to checkout $bad"; $result = run_bisect $type; @@ -2948,7 +2999,7 @@ sub bisect { if ($check ne "bad") { doprint "TESTING BISECT GOOD [$good]\n"; run_command "git checkout $good" or - die "Failed to checkout $good"; + dodie "Failed to checkout $good"; $result = run_bisect $type; @@ -2959,7 +3010,7 @@ sub bisect { # checkout where we started run_command "git checkout $head" or - die "Failed to checkout $head"; + dodie "Failed to checkout $head"; } run_command "git bisect start$start_files" or @@ -3092,76 +3143,6 @@ sub create_config { make_oldconfig; } -# compare two config hashes, and return configs with different vals. -# It returns B's config values, but you can use A to see what A was. -sub diff_config_vals { - my ($pa, $pb) = @_; - - # crappy Perl way to pass in hashes. - my %a = %{$pa}; - my %b = %{$pb}; - - my %ret; - - foreach my $item (keys %a) { - if (defined($b{$item}) && $b{$item} ne $a{$item}) { - $ret{$item} = $b{$item}; - } - } - - return %ret; -} - -# compare two config hashes and return the configs in B but not A -sub diff_configs { - my ($pa, $pb) = @_; - - my %ret; - - # crappy Perl way to pass in hashes. - my %a = %{$pa}; - my %b = %{$pb}; - - foreach my $item (keys %b) { - if (!defined($a{$item})) { - $ret{$item} = $b{$item}; - } - } - - return %ret; -} - -# return if two configs are equal or not -# 0 is equal +1 b has something a does not -# +1 if a and b have a different item. -# -1 if a has something b does not -sub compare_configs { - my ($pa, $pb) = @_; - - my %ret; - - # crappy Perl way to pass in hashes. - my %a = %{$pa}; - my %b = %{$pb}; - - foreach my $item (keys %b) { - if (!defined($a{$item})) { - return 1; - } - if ($a{$item} ne $b{$item}) { - return 1; - } - } - - foreach my $item (keys %a) { - if (!defined($b{$item})) { - return -1; - } - } - - return 0; -} - sub run_config_bisect_test { my ($type) = @_; @@ -3174,166 +3155,57 @@ sub run_config_bisect_test { return $ret; } -sub process_failed { - my ($config) = @_; +sub config_bisect_end { + my ($good, $bad) = @_; + my $diffexec = "diff -u"; + if (-f "$builddir/scripts/diffconfig") { + $diffexec = "$builddir/scripts/diffconfig"; + } doprint "\n\n***************************************\n"; - doprint "Found bad config: $config\n"; + doprint "No more config bisecting possible.\n"; + run_command "$diffexec $good $bad", 1; doprint "***************************************\n\n"; } -# used for config bisecting -my $good_config; -my $bad_config; - -sub process_new_config { - my ($tc, $nc, $gc, $bc) = @_; - - my %tmp_config = %{$tc}; - my %good_configs = %{$gc}; - my %bad_configs = %{$bc}; - - my %new_configs; - - my $runtest = 1; - my $ret; - - create_config "tmp_configs", \%tmp_config; - assign_configs \%new_configs, $output_config; - - $ret = compare_configs \%new_configs, \%bad_configs; - if (!$ret) { - doprint "New config equals bad config, try next test\n"; - $runtest = 0; - } - - if ($runtest) { - $ret = compare_configs \%new_configs, \%good_configs; - if (!$ret) { - doprint "New config equals good config, try next test\n"; - $runtest = 0; - } - } - - %{$nc} = %new_configs; - - return $runtest; -} - sub run_config_bisect { - my ($pgood, $pbad) = @_; - - my $type = $config_bisect_type; - - my %good_configs = %{$pgood}; - my %bad_configs = %{$pbad}; - - my %diff_configs = diff_config_vals \%good_configs, \%bad_configs; - my %b_configs = diff_configs \%good_configs, \%bad_configs; - my %g_configs = diff_configs \%bad_configs, \%good_configs; - - my @diff_arr = keys %diff_configs; - my $len_diff = $#diff_arr + 1; - - my @b_arr = keys %b_configs; - my $len_b = $#b_arr + 1; - - my @g_arr = keys %g_configs; - my $len_g = $#g_arr + 1; - - my $runtest = 1; - my %new_configs; + my ($good, $bad, $last_result) = @_; + my $reset = ""; + my $cmd; my $ret; - # First, lets get it down to a single subset. - # Is the problem with a difference in values? - # Is the problem with a missing config? - # Is the problem with a config that breaks things? - - # Enable all of one set and see if we get a new bad - # or good config. - - # first set the good config to the bad values. - - doprint "d=$len_diff g=$len_g b=$len_b\n"; - - # first lets enable things in bad config that are enabled in good config - - if ($len_diff > 0) { - if ($len_b > 0 || $len_g > 0) { - my %tmp_config = %bad_configs; - - doprint "Set tmp config to be bad config with good config values\n"; - foreach my $item (@diff_arr) { - $tmp_config{$item} = $good_configs{$item}; - } - - $runtest = process_new_config \%tmp_config, \%new_configs, - \%good_configs, \%bad_configs; - } + if (!length($last_result)) { + $reset = "-r"; } + run_command "$config_bisect_exec $reset -b $outputdir $good $bad $last_result", 1; - if (!$runtest && $len_diff > 0) { - - if ($len_diff == 1) { - process_failed $diff_arr[0]; - return 1; - } - my %tmp_config = %bad_configs; - - my $half = int($#diff_arr / 2); - my @tophalf = @diff_arr[0 .. $half]; - - doprint "Settings bisect with top half:\n"; - doprint "Set tmp config to be bad config with some good config values\n"; - foreach my $item (@tophalf) { - $tmp_config{$item} = $good_configs{$item}; - } - - $runtest = process_new_config \%tmp_config, \%new_configs, - \%good_configs, \%bad_configs; - - if (!$runtest) { - my %tmp_config = %bad_configs; - - doprint "Try bottom half\n"; - - my @bottomhalf = @diff_arr[$half+1 .. $#diff_arr]; - - foreach my $item (@bottomhalf) { - $tmp_config{$item} = $good_configs{$item}; - } - - $runtest = process_new_config \%tmp_config, \%new_configs, - \%good_configs, \%bad_configs; - } + # config-bisect returns: + # 0 if there is more to bisect + # 1 for finding a good config + # 2 if it can not find any more configs + # -1 (255) on error + if ($run_command_status) { + return $run_command_status; } - if ($runtest) { - $ret = run_config_bisect_test $type; - if ($ret) { - doprint "NEW GOOD CONFIG\n"; - %good_configs = %new_configs; - run_command "mv $good_config ${good_config}.last"; - save_config \%good_configs, $good_config; - %{$pgood} = %good_configs; - } else { - doprint "NEW BAD CONFIG\n"; - %bad_configs = %new_configs; - run_command "mv $bad_config ${bad_config}.last"; - save_config \%bad_configs, $bad_config; - %{$pbad} = %bad_configs; - } - return 0; + $ret = run_config_bisect_test $config_bisect_type; + if ($ret) { + doprint "NEW GOOD CONFIG\n"; + # Return 3 for good config + return 3; + } else { + doprint "NEW BAD CONFIG\n"; + # Return 4 for bad config + return 4; } - - fail "Hmm, need to do a mix match?\n"; - return -1; } sub config_bisect { my ($i) = @_; + my $good_config; + my $bad_config; + my $type = $config_bisect_type; my $ret; @@ -3353,6 +3225,24 @@ sub config_bisect { $good_config = $output_config; } + if (!defined($config_bisect_exec)) { + # First check the location that ktest.pl ran + my @locations = ( "$pwd/config-bisect.pl", + "$dirname/config-bisect.pl", + "$builddir/tools/testing/ktest/config-bisect.pl", + undef ); + foreach my $loc (@locations) { + doprint "loc = $loc\n"; + $config_bisect_exec = $loc; + last if (defined($config_bisect_exec && -x $config_bisect_exec)); + } + if (!defined($config_bisect_exec)) { + fail "Could not find an executable config-bisect.pl\n", + " Set CONFIG_BISECT_EXEC to point to config-bisect.pl"; + return 1; + } + } + # we don't want min configs to cause issues here. doprint "Disabling 'MIN_CONFIG' for this test\n"; undef $minconfig; @@ -3361,21 +3251,31 @@ sub config_bisect { my %bad_configs; my %tmp_configs; + if (-f "$tmpdir/good_config.tmp" || -f "$tmpdir/bad_config.tmp") { + if (read_yn "Interrupted config-bisect. Continue (n - will start new)?") { + if (-f "$tmpdir/good_config.tmp") { + $good_config = "$tmpdir/good_config.tmp"; + } else { + $good_config = "$tmpdir/good_config"; + } + if (-f "$tmpdir/bad_config.tmp") { + $bad_config = "$tmpdir/bad_config.tmp"; + } else { + $bad_config = "$tmpdir/bad_config"; + } + } + } doprint "Run good configs through make oldconfig\n"; assign_configs \%tmp_configs, $good_config; create_config "$good_config", \%tmp_configs; - assign_configs \%good_configs, $output_config; + $good_config = "$tmpdir/good_config"; + system("cp $output_config $good_config") == 0 or dodie "cp good config"; doprint "Run bad configs through make oldconfig\n"; assign_configs \%tmp_configs, $bad_config; create_config "$bad_config", \%tmp_configs; - assign_configs \%bad_configs, $output_config; - - $good_config = "$tmpdir/good_config"; $bad_config = "$tmpdir/bad_config"; - - save_config \%good_configs, $good_config; - save_config \%bad_configs, $bad_config; + system("cp $output_config $bad_config") == 0 or dodie "cp bad config"; if (defined($config_bisect_check) && $config_bisect_check ne "0") { if ($config_bisect_check ne "good") { @@ -3398,10 +3298,21 @@ sub config_bisect { } } + my $last_run = ""; + do { - $ret = run_config_bisect \%good_configs, \%bad_configs; + $ret = run_config_bisect $good_config, $bad_config, $last_run; + if ($ret == 3) { + $last_run = "good"; + } elsif ($ret == 4) { + $last_run = "bad"; + } print_times; - } while (!$ret); + } while ($ret == 3 || $ret == 4); + + if ($ret == 2) { + config_bisect_end "$good_config.tmp", "$bad_config.tmp"; + } return $ret if ($ret < 0); @@ -3416,9 +3327,9 @@ sub patchcheck_reboot { sub patchcheck { my ($i) = @_; - die "PATCHCHECK_START[$i] not defined\n" + dodie "PATCHCHECK_START[$i] not defined\n" if (!defined($patchcheck_start)); - die "PATCHCHECK_TYPE[$i] not defined\n" + dodie "PATCHCHECK_TYPE[$i] not defined\n" if (!defined($patchcheck_type)); my $start = $patchcheck_start; @@ -3432,7 +3343,7 @@ sub patchcheck { if (defined($patchcheck_end)) { $end = $patchcheck_end; } elsif ($cherry) { - die "PATCHCHECK_END must be defined with PATCHCHECK_CHERRY\n"; + dodie "PATCHCHECK_END must be defined with PATCHCHECK_CHERRY\n"; } # Get the true sha1's since we can use things like HEAD~3 @@ -3496,7 +3407,7 @@ sub patchcheck { doprint "\nProcessing commit \"$item\"\n\n"; run_command "git checkout $sha1" or - die "Failed to checkout $sha1"; + dodie "Failed to checkout $sha1"; # only clean on the first and last patch if ($item eq $list[0] || @@ -3587,7 +3498,7 @@ sub read_kconfig { } open(KIN, "$kconfig") - or die "Can't open $kconfig"; + or dodie "Can't open $kconfig"; while (<KIN>) { chomp; @@ -3683,8 +3594,6 @@ sub read_depends { # what directory to look at. if ($arch eq "i386" || $arch eq "x86_64") { $arch = "x86"; - } elsif ($arch =~ /^tile/) { - $arch = "tile"; } my $kconfig = "$builddir/arch/$arch/Kconfig"; @@ -3748,7 +3657,7 @@ sub get_depends { $dep =~ s/^[^$valid]*[$valid]+//; } else { - die "this should never happen"; + dodie "this should never happen"; } } @@ -4009,7 +3918,7 @@ sub make_min_config { # update new ignore configs if (defined($ignore_config)) { open (OUT, ">$temp_config") - or die "Can't write to $temp_config"; + or dodie "Can't write to $temp_config"; foreach my $config (keys %save_configs) { print OUT "$save_configs{$config}\n"; } @@ -4037,7 +3946,7 @@ sub make_min_config { # Save off all the current mandatory configs open (OUT, ">$temp_config") - or die "Can't write to $temp_config"; + or dodie "Can't write to $temp_config"; foreach my $config (keys %keep_configs) { print OUT "$keep_configs{$config}\n"; } @@ -4224,6 +4133,74 @@ sub set_test_option { return eval_option($name, $option, $i); } +sub find_mailer { + my ($mailer) = @_; + + my @paths = split /:/, $ENV{PATH}; + + # sendmail is usually in /usr/sbin + $paths[$#paths + 1] = "/usr/sbin"; + + foreach my $path (@paths) { + if (-x "$path/$mailer") { + return $path; + } + } + + return undef; +} + +sub do_send_mail { + my ($subject, $message) = @_; + + if (!defined($mail_path)) { + # find the mailer + $mail_path = find_mailer $mailer; + if (!defined($mail_path)) { + die "\nCan not find $mailer in PATH\n"; + } + } + + if (!defined($mail_command)) { + if ($mailer eq "mail" || $mailer eq "mailx") { + $mail_command = "\$MAIL_PATH/\$MAILER -s \'\$SUBJECT\' \$MAILTO <<< \'\$MESSAGE\'"; + } elsif ($mailer eq "sendmail" ) { + $mail_command = "echo \'Subject: \$SUBJECT\n\n\$MESSAGE\' | \$MAIL_PATH/\$MAILER -t \$MAILTO"; + } else { + die "\nYour mailer: $mailer is not supported.\n"; + } + } + + $mail_command =~ s/\$MAILER/$mailer/g; + $mail_command =~ s/\$MAIL_PATH/$mail_path/g; + $mail_command =~ s/\$MAILTO/$mailto/g; + $mail_command =~ s/\$SUBJECT/$subject/g; + $mail_command =~ s/\$MESSAGE/$message/g; + + run_command $mail_command; +} + +sub send_email { + + if (defined($mailto)) { + if (!defined($mailer)) { + doprint "No email sent: email or mailer not specified in config.\n"; + return; + } + do_send_mail @_; + } +} + +sub cancel_test { + if ($email_when_canceled) { + send_email("KTEST: Your [$test_type] test was cancelled", + "Your test started at $script_start_time was cancelled: sig int"); + } + die "\nCaught Sig Int, test interrupted: $!\n" +} + +$SIG{INT} = qw(cancel_test); + # First we need to do is the builds for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { @@ -4247,11 +4224,11 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { $outputdir = set_test_option("OUTPUT_DIR", $i); $builddir = set_test_option("BUILD_DIR", $i); - chdir $builddir || die "can't change directory to $builddir"; + chdir $builddir || dodie "can't change directory to $builddir"; if (!-d $outputdir) { mkpath($outputdir) or - die "can't create $outputdir"; + dodie "can't create $outputdir"; } $make = "$makecmd O=$outputdir"; @@ -4264,9 +4241,15 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { $start_minconfig_defined = 1; # The first test may override the PRE_KTEST option - if (defined($pre_ktest) && $i == 1) { - doprint "\n"; - run_command $pre_ktest; + if ($i == 1) { + if (defined($pre_ktest)) { + doprint "\n"; + run_command $pre_ktest; + } + if ($email_when_started) { + send_email("KTEST: Your [$test_type] test was started", + "Your test was started on $script_start_time"); + } } # Any test can override the POST_KTEST option @@ -4282,7 +4265,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { if (!-d $tmpdir) { mkpath($tmpdir) or - die "can't create $tmpdir"; + dodie "can't create $tmpdir"; } $ENV{"SSH_USER"} = $ssh_user; @@ -4355,7 +4338,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { if (defined($checkout)) { run_command "git checkout $checkout" or - die "failed to checkout $checkout"; + dodie "failed to checkout $checkout"; } $no_reboot = 0; @@ -4430,4 +4413,8 @@ if ($opt{"POWEROFF_ON_SUCCESS"}) { doprint "\n $successes of $opt{NUM_TESTS} tests were successful\n\n"; +if ($email_when_finished) { + send_email("KTEST: Your [$test_type] test has finished!", + "$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!"); +} exit 0; diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf index 6c58cd8bbbae..6ca6ca0ce695 100644 --- a/tools/testing/ktest/sample.conf +++ b/tools/testing/ktest/sample.conf @@ -1,6 +1,11 @@ # # Config file for ktest.pl # +# Place your customized version of this, in the working directory that +# ktest.pl is run from. By default, ktest.pl will look for a file +# called "ktest.conf", but you can name it anything you like and specify +# the name of your config file as the first argument of ktest.pl. +# # Note, all paths must be absolute # @@ -396,6 +401,44 @@ #### Optional Config Options (all have defaults) #### +# Email options for receiving notifications. Users must setup +# the specified mailer prior to using this feature. +# +# (default undefined) +#MAILTO = +# +# Supported mailers: sendmail, mail, mailx +# (default sendmail) +#MAILER = sendmail +# +# The executable to run +# (default: for sendmail "/usr/sbin/sendmail", otherwise equals ${MAILER}) +#MAIL_EXEC = /usr/sbin/sendmail +# +# The command used to send mail, which uses the above options +# can be modified. By default if the mailer is "sendmail" then +# MAIL_COMMAND = echo \'Subject: $SUBJECT\n\n$MESSAGE\' | $MAIL_PATH/$MAILER -t $MAILTO +# For mail or mailx: +# MAIL_COMMAND = "$MAIL_PATH/$MAILER -s \'$SUBJECT\' $MAILTO <<< \'$MESSAGE\' +# ktest.pl will do the substitution for MAIL_PATH, MAILER, MAILTO at the time +# it sends the mail if "$FOO" format is used. If "${FOO}" format is used, +# then the substitutions will occur at the time the config file is read. +# But note, MAIL_PATH and MAILER require being set by the config file if +# ${MAIL_PATH} or ${MAILER} are used, but not if $MAIL_PATH or $MAILER are. +#MAIL_COMMAND = echo \'Subject: $SUBJECT\n\n$MESSAGE\' | $MAIL_PATH/$MAILER -t $MAILTO +# +# Errors are defined as those would terminate the script +# (default 1) +#EMAIL_ON_ERROR = 1 +# (default 1) +#EMAIL_WHEN_FINISHED = 1 +# (default 0) +#EMAIL_WHEN_STARTED = 1 +# +# Users can cancel the test by Ctrl^C +# (default 0) +#EMAIL_WHEN_CANCELED = 1 + # Start a test setup. If you leave this off, all options # will be default and the test will run once. # This is a label and not really an option (it takes no value). @@ -725,6 +768,13 @@ # (default 120) #TIMEOUT = 120 +# The timeout in seconds when to test if the box can be rebooted +# or not. Before issuing the reboot command, a ssh connection +# is attempted to see if the target machine is still active. +# If the target does not connect within this timeout, a power cycle +# is issued instead of a reboot. +# CONNECT_TIMEOUT = 25 + # In between tests, a reboot of the box may occur, and this # is the time to wait for the console after it stops producing # output. Some machines may not produce a large lag on reboot @@ -1167,6 +1217,16 @@ # Set it to "good" to test only the good config and set it # to "bad" to only test the bad config. # +# CONFIG_BISECT_EXEC (optional) +# The config bisect is a separate program that comes with ktest.pl. +# By befault, it will look for: +# `pwd`/config-bisect.pl # the location ktest.pl was executed from. +# If it does not find it there, it will look for: +# `dirname <ktest.pl>`/config-bisect.pl # The directory that holds ktest.pl +# If it does not find it there, it will look for: +# ${BUILD_DIR}/tools/testing/ktest/config-bisect.pl +# Setting CONFIG_BISECT_EXEC will override where it looks. +# # Example: # TEST_START # TEST_TYPE = config_bisect diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index db33b28c5ef3..0392153a0009 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -37,10 +37,12 @@ obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o nfit-y := $(ACPI_SRC)/core.o nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o +nfit-y += acpi_nfit_test.o nfit-y += config_check.o nd_pmem-y := $(NVDIMM_SRC)/pmem.o nd_pmem-y += pmem-dax.o +nd_pmem-y += pmem_test.o nd_pmem-y += config_check.o nd_btt-y := $(NVDIMM_SRC)/btt.o @@ -57,6 +59,7 @@ dax-y += config_check.o device_dax-y := $(DAX_SRC)/device.o device_dax-y += dax-dev.o +device_dax-y += device_dax_test.o device_dax-y += config_check.o dax_pmem-y := $(DAX_SRC)/pmem.o @@ -75,6 +78,7 @@ libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o +libnvdimm-y += libnvdimm_test.o libnvdimm-y += config_check.o obj-m += test/ diff --git a/tools/testing/nvdimm/acpi_nfit_test.c b/tools/testing/nvdimm/acpi_nfit_test.c new file mode 100644 index 000000000000..43521512e577 --- /dev/null +++ b/tools/testing/nvdimm/acpi_nfit_test.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Intel Corporation. All rights reserved. + +#include <linux/module.h> +#include <linux/printk.h> +#include "watermark.h" + +nfit_test_watermark(acpi_nfit); diff --git a/tools/testing/nvdimm/device_dax_test.c b/tools/testing/nvdimm/device_dax_test.c new file mode 100644 index 000000000000..24b17bf42429 --- /dev/null +++ b/tools/testing/nvdimm/device_dax_test.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Intel Corporation. All rights reserved. + +#include <linux/module.h> +#include <linux/printk.h> +#include "watermark.h" + +nfit_test_watermark(device_dax); diff --git a/tools/testing/nvdimm/libnvdimm_test.c b/tools/testing/nvdimm/libnvdimm_test.c new file mode 100644 index 000000000000..00ca30b23932 --- /dev/null +++ b/tools/testing/nvdimm/libnvdimm_test.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Intel Corporation. All rights reserved. + +#include <linux/module.h> +#include <linux/printk.h> +#include "watermark.h" + +nfit_test_watermark(libnvdimm); diff --git a/tools/testing/nvdimm/pmem_test.c b/tools/testing/nvdimm/pmem_test.c new file mode 100644 index 000000000000..fd38f92275cf --- /dev/null +++ b/tools/testing/nvdimm/pmem_test.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Intel Corporation. All rights reserved. + +#include <linux/module.h> +#include <linux/printk.h> +#include "watermark.h" + +nfit_test_watermark(pmem); diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index e1f75a1914a1..ff9d3a5825e1 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -104,15 +104,14 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset, } EXPORT_SYMBOL(__wrap_devm_memremap); -void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res, - struct percpu_ref *ref, struct vmem_altmap *altmap) +void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) { - resource_size_t offset = res->start; + resource_size_t offset = pgmap->res.start; struct nfit_test_resource *nfit_res = get_nfit_res(offset); if (nfit_res) return nfit_res->buf + offset - nfit_res->res.start; - return devm_memremap_pages(dev, res, ref, altmap); + return devm_memremap_pages(dev, pgmap); } EXPORT_SYMBOL(__wrap_devm_memremap_pages); diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 7217b2b953b5..cb166be4918d 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -27,6 +27,7 @@ #include <nfit.h> #include <nd.h> #include "nfit_test.h" +#include "../watermark.h" /* * Generate an NFIT table to describe the following topology: @@ -103,7 +104,8 @@ enum { NUM_HINTS = 8, NUM_BDW = NUM_DCR, NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW, - NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */, + NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + + 4 /* spa1 iset */ + 1 /* spa11 iset */, DIMM_SIZE = SZ_32M, LABEL_SIZE = SZ_128K, SPA_VCD_SIZE = SZ_4M, @@ -137,6 +139,14 @@ static u32 handle[] = { static unsigned long dimm_fail_cmd_flags[NUM_DCR]; +struct nfit_test_fw { + enum intel_fw_update_state state; + u32 context; + u64 version; + u32 size_received; + u64 end_time; +}; + struct nfit_test { struct acpi_nfit_desc acpi_desc; struct platform_device pdev; @@ -144,6 +154,7 @@ struct nfit_test { void *nfit_buf; dma_addr_t nfit_dma; size_t nfit_size; + size_t nfit_filled; int dcr_idx; int num_dcr; int num_pm; @@ -168,8 +179,11 @@ struct nfit_test { spinlock_t lock; } ars_state; struct device *dimm_dev[NUM_DCR]; + struct nd_intel_smart *smart; + struct nd_intel_smart_threshold *smart_threshold; struct badrange badrange; struct work_struct work; + struct nfit_test_fw *fw; }; static struct workqueue_struct *nfit_wq; @@ -181,6 +195,226 @@ static struct nfit_test *to_nfit_test(struct device *dev) return container_of(pdev, struct nfit_test, pdev); } +static int nd_intel_test_get_fw_info(struct nfit_test *t, + struct nd_intel_fw_info *nd_cmd, unsigned int buf_len, + int idx) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_fw *fw = &t->fw[idx]; + + dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p, buf_len: %u, idx: %d\n", + __func__, t, nd_cmd, buf_len, idx); + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + nd_cmd->status = 0; + nd_cmd->storage_size = INTEL_FW_STORAGE_SIZE; + nd_cmd->max_send_len = INTEL_FW_MAX_SEND_LEN; + nd_cmd->query_interval = INTEL_FW_QUERY_INTERVAL; + nd_cmd->max_query_time = INTEL_FW_QUERY_MAX_TIME; + nd_cmd->update_cap = 0; + nd_cmd->fis_version = INTEL_FW_FIS_VERSION; + nd_cmd->run_version = 0; + nd_cmd->updated_version = fw->version; + + return 0; +} + +static int nd_intel_test_start_update(struct nfit_test *t, + struct nd_intel_fw_start *nd_cmd, unsigned int buf_len, + int idx) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_fw *fw = &t->fw[idx]; + + dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n", + __func__, t, nd_cmd, buf_len, idx); + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + if (fw->state != FW_STATE_NEW) { + /* extended status, FW update in progress */ + nd_cmd->status = 0x10007; + return 0; + } + + fw->state = FW_STATE_IN_PROGRESS; + fw->context++; + fw->size_received = 0; + nd_cmd->status = 0; + nd_cmd->context = fw->context; + + dev_dbg(dev, "%s: context issued: %#x\n", __func__, nd_cmd->context); + + return 0; +} + +static int nd_intel_test_send_data(struct nfit_test *t, + struct nd_intel_fw_send_data *nd_cmd, unsigned int buf_len, + int idx) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_fw *fw = &t->fw[idx]; + u32 *status = (u32 *)&nd_cmd->data[nd_cmd->length]; + + dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n", + __func__, t, nd_cmd, buf_len, idx); + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + + dev_dbg(dev, "%s: cmd->status: %#x\n", __func__, *status); + dev_dbg(dev, "%s: cmd->data[0]: %#x\n", __func__, nd_cmd->data[0]); + dev_dbg(dev, "%s: cmd->data[%u]: %#x\n", __func__, nd_cmd->length-1, + nd_cmd->data[nd_cmd->length-1]); + + if (fw->state != FW_STATE_IN_PROGRESS) { + dev_dbg(dev, "%s: not in IN_PROGRESS state\n", __func__); + *status = 0x5; + return 0; + } + + if (nd_cmd->context != fw->context) { + dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n", + __func__, nd_cmd->context, fw->context); + *status = 0x10007; + return 0; + } + + /* + * check offset + len > size of fw storage + * check length is > max send length + */ + if (nd_cmd->offset + nd_cmd->length > INTEL_FW_STORAGE_SIZE || + nd_cmd->length > INTEL_FW_MAX_SEND_LEN) { + *status = 0x3; + dev_dbg(dev, "%s: buffer boundary violation\n", __func__); + return 0; + } + + fw->size_received += nd_cmd->length; + dev_dbg(dev, "%s: copying %u bytes, %u bytes so far\n", + __func__, nd_cmd->length, fw->size_received); + *status = 0; + return 0; +} + +static int nd_intel_test_finish_fw(struct nfit_test *t, + struct nd_intel_fw_finish_update *nd_cmd, + unsigned int buf_len, int idx) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_fw *fw = &t->fw[idx]; + + dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n", + __func__, t, nd_cmd, buf_len, idx); + + if (fw->state == FW_STATE_UPDATED) { + /* update already done, need cold boot */ + nd_cmd->status = 0x20007; + return 0; + } + + dev_dbg(dev, "%s: context: %#x ctrl_flags: %#x\n", + __func__, nd_cmd->context, nd_cmd->ctrl_flags); + + switch (nd_cmd->ctrl_flags) { + case 0: /* finish */ + if (nd_cmd->context != fw->context) { + dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n", + __func__, nd_cmd->context, + fw->context); + nd_cmd->status = 0x10007; + return 0; + } + nd_cmd->status = 0; + fw->state = FW_STATE_VERIFY; + /* set 1 second of time for firmware "update" */ + fw->end_time = jiffies + HZ; + break; + + case 1: /* abort */ + fw->size_received = 0; + /* successfully aborted status */ + nd_cmd->status = 0x40007; + fw->state = FW_STATE_NEW; + dev_dbg(dev, "%s: abort successful\n", __func__); + break; + + default: /* bad control flag */ + dev_warn(dev, "%s: unknown control flag: %#x\n", + __func__, nd_cmd->ctrl_flags); + return -EINVAL; + } + + return 0; +} + +static int nd_intel_test_finish_query(struct nfit_test *t, + struct nd_intel_fw_finish_query *nd_cmd, + unsigned int buf_len, int idx) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_fw *fw = &t->fw[idx]; + + dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n", + __func__, t, nd_cmd, buf_len, idx); + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + if (nd_cmd->context != fw->context) { + dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n", + __func__, nd_cmd->context, fw->context); + nd_cmd->status = 0x10007; + return 0; + } + + dev_dbg(dev, "%s context: %#x\n", __func__, nd_cmd->context); + + switch (fw->state) { + case FW_STATE_NEW: + nd_cmd->updated_fw_rev = 0; + nd_cmd->status = 0; + dev_dbg(dev, "%s: new state\n", __func__); + break; + + case FW_STATE_IN_PROGRESS: + /* sequencing error */ + nd_cmd->status = 0x40007; + nd_cmd->updated_fw_rev = 0; + dev_dbg(dev, "%s: sequence error\n", __func__); + break; + + case FW_STATE_VERIFY: + if (time_is_after_jiffies64(fw->end_time)) { + nd_cmd->updated_fw_rev = 0; + nd_cmd->status = 0x20007; + dev_dbg(dev, "%s: still verifying\n", __func__); + break; + } + + dev_dbg(dev, "%s: transition out verify\n", __func__); + fw->state = FW_STATE_UPDATED; + /* we are going to fall through if it's "done" */ + case FW_STATE_UPDATED: + nd_cmd->status = 0; + /* bogus test version */ + fw->version = nd_cmd->updated_fw_rev = + INTEL_FW_FAKE_VERSION; + dev_dbg(dev, "%s: updated\n", __func__); + break; + + default: /* we should never get here */ + return -EINVAL; + } + + return 0; +} + static int nfit_test_cmd_get_config_size(struct nd_cmd_get_config_size *nd_cmd, unsigned int buf_len) { @@ -440,39 +674,94 @@ static int nfit_test_cmd_translate_spa(struct nvdimm_bus *bus, return 0; } -static int nfit_test_cmd_smart(struct nd_cmd_smart *smart, unsigned int buf_len) +static int nfit_test_cmd_smart(struct nd_intel_smart *smart, unsigned int buf_len, + struct nd_intel_smart *smart_data) { - static const struct nd_smart_payload smart_data = { - .flags = ND_SMART_HEALTH_VALID | ND_SMART_TEMP_VALID - | ND_SMART_SPARES_VALID | ND_SMART_ALARM_VALID - | ND_SMART_USED_VALID | ND_SMART_SHUTDOWN_VALID, - .health = ND_SMART_NON_CRITICAL_HEALTH, - .temperature = 23 * 16, - .spares = 75, - .alarm_flags = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP, - .life_used = 5, - .shutdown_state = 0, - .vendor_size = 0, - }; - if (buf_len < sizeof(*smart)) return -EINVAL; - memcpy(smart->data, &smart_data, sizeof(smart_data)); + memcpy(smart, smart_data, sizeof(*smart)); return 0; } -static int nfit_test_cmd_smart_threshold(struct nd_cmd_smart_threshold *smart_t, - unsigned int buf_len) +static int nfit_test_cmd_smart_threshold( + struct nd_intel_smart_threshold *out, + unsigned int buf_len, + struct nd_intel_smart_threshold *smart_t) { - static const struct nd_smart_threshold_payload smart_t_data = { - .alarm_control = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP, - .temperature = 40 * 16, - .spares = 5, - }; - if (buf_len < sizeof(*smart_t)) return -EINVAL; - memcpy(smart_t->data, &smart_t_data, sizeof(smart_t_data)); + memcpy(out, smart_t, sizeof(*smart_t)); + return 0; +} + +static void smart_notify(struct device *bus_dev, + struct device *dimm_dev, struct nd_intel_smart *smart, + struct nd_intel_smart_threshold *thresh) +{ + dev_dbg(dimm_dev, "%s: alarm: %#x spares: %d (%d) mtemp: %d (%d) ctemp: %d (%d)\n", + __func__, thresh->alarm_control, thresh->spares, + smart->spares, thresh->media_temperature, + smart->media_temperature, thresh->ctrl_temperature, + smart->ctrl_temperature); + if (((thresh->alarm_control & ND_INTEL_SMART_SPARE_TRIP) + && smart->spares + <= thresh->spares) + || ((thresh->alarm_control & ND_INTEL_SMART_TEMP_TRIP) + && smart->media_temperature + >= thresh->media_temperature) + || ((thresh->alarm_control & ND_INTEL_SMART_CTEMP_TRIP) + && smart->ctrl_temperature + >= thresh->ctrl_temperature) + || (smart->health != ND_INTEL_SMART_NON_CRITICAL_HEALTH) + || (smart->shutdown_state != 0)) { + device_lock(bus_dev); + __acpi_nvdimm_notify(dimm_dev, 0x81); + device_unlock(bus_dev); + } +} + +static int nfit_test_cmd_smart_set_threshold( + struct nd_intel_smart_set_threshold *in, + unsigned int buf_len, + struct nd_intel_smart_threshold *thresh, + struct nd_intel_smart *smart, + struct device *bus_dev, struct device *dimm_dev) +{ + unsigned int size; + + size = sizeof(*in) - 4; + if (buf_len < size) + return -EINVAL; + memcpy(thresh->data, in, size); + in->status = 0; + smart_notify(bus_dev, dimm_dev, smart, thresh); + + return 0; +} + +static int nfit_test_cmd_smart_inject( + struct nd_intel_smart_inject *inj, + unsigned int buf_len, + struct nd_intel_smart_threshold *thresh, + struct nd_intel_smart *smart, + struct device *bus_dev, struct device *dimm_dev) +{ + if (buf_len != sizeof(*inj)) + return -EINVAL; + + if (inj->mtemp_enable) + smart->media_temperature = inj->media_temperature; + if (inj->spare_enable) + smart->spares = inj->spares; + if (inj->fatal_enable) + smart->health = ND_INTEL_SMART_FATAL_HEALTH; + if (inj->unsafe_shutdown_enable) { + smart->shutdown_state = 1; + smart->shutdown_count++; + } + inj->status = 0; + smart_notify(bus_dev, dimm_dev, smart, thresh); + return 0; } @@ -563,6 +852,52 @@ static int nfit_test_cmd_ars_inject_status(struct nfit_test *t, return 0; } +static int nd_intel_test_cmd_set_lss_status(struct nfit_test *t, + struct nd_intel_lss *nd_cmd, unsigned int buf_len) +{ + struct device *dev = &t->pdev.dev; + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + switch (nd_cmd->enable) { + case 0: + nd_cmd->status = 0; + dev_dbg(dev, "%s: Latch System Shutdown Status disabled\n", + __func__); + break; + case 1: + nd_cmd->status = 0; + dev_dbg(dev, "%s: Latch System Shutdown Status enabled\n", + __func__); + break; + default: + dev_warn(dev, "Unknown enable value: %#x\n", nd_cmd->enable); + nd_cmd->status = 0x3; + break; + } + + + return 0; +} + +static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func) +{ + int i; + + /* lookup per-dimm data */ + for (i = 0; i < ARRAY_SIZE(handle); i++) + if (__to_nfit_memdev(nfit_mem)->device_handle == handle[i]) + break; + if (i >= ARRAY_SIZE(handle)) + return -ENXIO; + + if ((1 << func) & dimm_fail_cmd_flags[i]) + return -EIO; + + return i; +} + static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) @@ -591,22 +926,64 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, func = call_pkg->nd_command; if (call_pkg->nd_family != nfit_mem->family) return -ENOTTY; + + i = get_dimm(nfit_mem, func); + if (i < 0) + return i; + + switch (func) { + case ND_INTEL_ENABLE_LSS_STATUS: + return nd_intel_test_cmd_set_lss_status(t, + buf, buf_len); + case ND_INTEL_FW_GET_INFO: + return nd_intel_test_get_fw_info(t, buf, + buf_len, i - t->dcr_idx); + case ND_INTEL_FW_START_UPDATE: + return nd_intel_test_start_update(t, buf, + buf_len, i - t->dcr_idx); + case ND_INTEL_FW_SEND_DATA: + return nd_intel_test_send_data(t, buf, + buf_len, i - t->dcr_idx); + case ND_INTEL_FW_FINISH_UPDATE: + return nd_intel_test_finish_fw(t, buf, + buf_len, i - t->dcr_idx); + case ND_INTEL_FW_FINISH_QUERY: + return nd_intel_test_finish_query(t, buf, + buf_len, i - t->dcr_idx); + case ND_INTEL_SMART: + return nfit_test_cmd_smart(buf, buf_len, + &t->smart[i - t->dcr_idx]); + case ND_INTEL_SMART_THRESHOLD: + return nfit_test_cmd_smart_threshold(buf, + buf_len, + &t->smart_threshold[i - + t->dcr_idx]); + case ND_INTEL_SMART_SET_THRESHOLD: + return nfit_test_cmd_smart_set_threshold(buf, + buf_len, + &t->smart_threshold[i - + t->dcr_idx], + &t->smart[i - t->dcr_idx], + &t->pdev.dev, t->dimm_dev[i]); + case ND_INTEL_SMART_INJECT: + return nfit_test_cmd_smart_inject(buf, + buf_len, + &t->smart_threshold[i - + t->dcr_idx], + &t->smart[i - t->dcr_idx], + &t->pdev.dev, t->dimm_dev[i]); + default: + return -ENOTTY; + } } if (!test_bit(cmd, &cmd_mask) || !test_bit(func, &nfit_mem->dsm_mask)) return -ENOTTY; - /* lookup label space for the given dimm */ - for (i = 0; i < ARRAY_SIZE(handle); i++) - if (__to_nfit_memdev(nfit_mem)->device_handle == - handle[i]) - break; - if (i >= ARRAY_SIZE(handle)) - return -ENXIO; - - if ((1 << func) & dimm_fail_cmd_flags[i]) - return -EIO; + i = get_dimm(nfit_mem, func); + if (i < 0) + return i; switch (func) { case ND_CMD_GET_CONFIG_SIZE: @@ -620,15 +997,6 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, rc = nfit_test_cmd_set_config_data(buf, buf_len, t->label[i - t->dcr_idx]); break; - case ND_CMD_SMART: - rc = nfit_test_cmd_smart(buf, buf_len); - break; - case ND_CMD_SMART_THRESHOLD: - rc = nfit_test_cmd_smart_threshold(buf, buf_len); - device_lock(&t->pdev.dev); - __acpi_nvdimm_notify(t->dimm_dev[i], 0x81); - device_unlock(&t->pdev.dev); - break; default: return -ENOTTY; } @@ -872,6 +1240,44 @@ static const struct attribute_group *nfit_test_dimm_attribute_groups[] = { NULL, }; +static void smart_init(struct nfit_test *t) +{ + int i; + const struct nd_intel_smart_threshold smart_t_data = { + .alarm_control = ND_INTEL_SMART_SPARE_TRIP + | ND_INTEL_SMART_TEMP_TRIP, + .media_temperature = 40 * 16, + .ctrl_temperature = 30 * 16, + .spares = 5, + }; + const struct nd_intel_smart smart_data = { + .flags = ND_INTEL_SMART_HEALTH_VALID + | ND_INTEL_SMART_SPARES_VALID + | ND_INTEL_SMART_ALARM_VALID + | ND_INTEL_SMART_USED_VALID + | ND_INTEL_SMART_SHUTDOWN_VALID + | ND_INTEL_SMART_MTEMP_VALID, + .health = ND_INTEL_SMART_NON_CRITICAL_HEALTH, + .media_temperature = 23 * 16, + .ctrl_temperature = 25 * 16, + .pmic_temperature = 40 * 16, + .spares = 75, + .alarm_flags = ND_INTEL_SMART_SPARE_TRIP + | ND_INTEL_SMART_TEMP_TRIP, + .ait_status = 1, + .life_used = 5, + .shutdown_state = 0, + .vendor_size = 0, + .shutdown_count = 100, + }; + + for (i = 0; i < t->num_dcr; i++) { + memcpy(&t->smart[i], &smart_data, sizeof(smart_data)); + memcpy(&t->smart_threshold[i], &smart_t_data, + sizeof(smart_t_data)); + } +} + static int nfit_test0_alloc(struct nfit_test *t) { size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA @@ -881,7 +1287,8 @@ static int nfit_test0_alloc(struct nfit_test *t) window_size) * NUM_DCR + sizeof(struct acpi_nfit_data_region) * NUM_BDW + (sizeof(struct acpi_nfit_flush_address) - + sizeof(u64) * NUM_HINTS) * NUM_DCR; + + sizeof(u64) * NUM_HINTS) * NUM_DCR + + sizeof(struct acpi_nfit_capabilities); int i; t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); @@ -939,6 +1346,7 @@ static int nfit_test0_alloc(struct nfit_test *t) return -ENOMEM; } + smart_init(t); return ars_state_init(&t->pdev.dev, &t->ars_state); } @@ -969,6 +1377,7 @@ static int nfit_test1_alloc(struct nfit_test *t) if (!t->spa_set[1]) return -ENOMEM; + smart_init(t); return ars_state_init(&t->pdev.dev, &t->ars_state); } @@ -993,7 +1402,8 @@ static void nfit_test0_setup(struct nfit_test *t) struct acpi_nfit_control_region *dcr; struct acpi_nfit_data_region *bdw; struct acpi_nfit_flush_address *flush; - unsigned int offset, i; + struct acpi_nfit_capabilities *pcap; + unsigned int offset = 0, i; /* * spa0 (interleave first half of dimm0 and dimm1, note storage @@ -1007,93 +1417,102 @@ static void nfit_test0_setup(struct nfit_test *t) spa->range_index = 0+1; spa->address = t->spa_set_dma[0]; spa->length = SPA0_SIZE; + offset += spa->header.length; /* * spa1 (interleave last half of the 4 DIMMS, note storage * does not actually alias the related block-data-window * regions) */ - spa = nfit_buf + sizeof(*spa); + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); spa->range_index = 1+1; spa->address = t->spa_set_dma[1]; spa->length = SPA1_SIZE; + offset += spa->header.length; /* spa2 (dcr0) dimm0 */ - spa = nfit_buf + sizeof(*spa) * 2; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); spa->range_index = 2+1; spa->address = t->dcr_dma[0]; spa->length = DCR_SIZE; + offset += spa->header.length; /* spa3 (dcr1) dimm1 */ - spa = nfit_buf + sizeof(*spa) * 3; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); spa->range_index = 3+1; spa->address = t->dcr_dma[1]; spa->length = DCR_SIZE; + offset += spa->header.length; /* spa4 (dcr2) dimm2 */ - spa = nfit_buf + sizeof(*spa) * 4; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); spa->range_index = 4+1; spa->address = t->dcr_dma[2]; spa->length = DCR_SIZE; + offset += spa->header.length; /* spa5 (dcr3) dimm3 */ - spa = nfit_buf + sizeof(*spa) * 5; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); spa->range_index = 5+1; spa->address = t->dcr_dma[3]; spa->length = DCR_SIZE; + offset += spa->header.length; /* spa6 (bdw for dcr0) dimm0 */ - spa = nfit_buf + sizeof(*spa) * 6; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 6+1; spa->address = t->dimm_dma[0]; spa->length = DIMM_SIZE; + offset += spa->header.length; /* spa7 (bdw for dcr1) dimm1 */ - spa = nfit_buf + sizeof(*spa) * 7; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 7+1; spa->address = t->dimm_dma[1]; spa->length = DIMM_SIZE; + offset += spa->header.length; /* spa8 (bdw for dcr2) dimm2 */ - spa = nfit_buf + sizeof(*spa) * 8; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 8+1; spa->address = t->dimm_dma[2]; spa->length = DIMM_SIZE; + offset += spa->header.length; /* spa9 (bdw for dcr3) dimm3 */ - spa = nfit_buf + sizeof(*spa) * 9; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 9+1; spa->address = t->dimm_dma[3]; spa->length = DIMM_SIZE; + offset += spa->header.length; - offset = sizeof(*spa) * 10; /* mem-region0 (spa0, dimm0) */ memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; @@ -1108,9 +1527,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 2; + offset += memdev->header.length; /* mem-region1 (spa0, dimm1) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map); + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[1]; @@ -1124,9 +1544,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->interleave_index = 0; memdev->interleave_ways = 2; memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; + offset += memdev->header.length; /* mem-region2 (spa1, dimm0) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 2; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[0]; @@ -1140,9 +1561,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->interleave_index = 0; memdev->interleave_ways = 4; memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; + offset += memdev->header.length; /* mem-region3 (spa1, dimm1) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 3; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[1]; @@ -1155,9 +1577,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = SPA0_SIZE/2; memdev->interleave_index = 0; memdev->interleave_ways = 4; + offset += memdev->header.length; /* mem-region4 (spa1, dimm2) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 4; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[2]; @@ -1171,9 +1594,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->interleave_index = 0; memdev->interleave_ways = 4; memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; + offset += memdev->header.length; /* mem-region5 (spa1, dimm3) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 5; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[3]; @@ -1186,9 +1610,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = SPA0_SIZE/2; memdev->interleave_index = 0; memdev->interleave_ways = 4; + offset += memdev->header.length; /* mem-region6 (spa/dcr0, dimm0) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 6; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[0]; @@ -1201,9 +1626,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region7 (spa/dcr1, dimm1) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 7; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[1]; @@ -1216,9 +1642,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region8 (spa/dcr2, dimm2) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 8; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[2]; @@ -1231,9 +1658,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region9 (spa/dcr3, dimm3) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 9; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[3]; @@ -1246,9 +1674,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region10 (spa/bdw0, dimm0) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 10; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[0]; @@ -1261,9 +1690,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region11 (spa/bdw1, dimm1) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 11; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[1]; @@ -1276,9 +1706,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region12 (spa/bdw2, dimm2) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 12; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[2]; @@ -1291,9 +1722,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region13 (spa/dcr3, dimm3) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 13; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[3]; @@ -1307,12 +1739,12 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->interleave_index = 0; memdev->interleave_ways = 1; memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; + offset += memdev->header.length; - offset = offset + sizeof(struct acpi_nfit_memory_map) * 14; /* dcr-descriptor0: blk */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; - dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->header.length = sizeof(*dcr); dcr->region_index = 0+1; dcr_common_init(dcr); dcr->serial_number = ~handle[0]; @@ -1323,11 +1755,12 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->command_size = 8; dcr->status_offset = 8; dcr->status_size = 4; + offset += dcr->header.length; /* dcr-descriptor1: blk */ - dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region); + dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; - dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->header.length = sizeof(*dcr); dcr->region_index = 1+1; dcr_common_init(dcr); dcr->serial_number = ~handle[1]; @@ -1338,11 +1771,12 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->command_size = 8; dcr->status_offset = 8; dcr->status_size = 4; + offset += dcr->header.length; /* dcr-descriptor2: blk */ - dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 2; + dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; - dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->header.length = sizeof(*dcr); dcr->region_index = 2+1; dcr_common_init(dcr); dcr->serial_number = ~handle[2]; @@ -1353,11 +1787,12 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->command_size = 8; dcr->status_offset = 8; dcr->status_size = 4; + offset += dcr->header.length; /* dcr-descriptor3: blk */ - dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 3; + dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; - dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->header.length = sizeof(*dcr); dcr->region_index = 3+1; dcr_common_init(dcr); dcr->serial_number = ~handle[3]; @@ -1368,8 +1803,8 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->command_size = 8; dcr->status_offset = 8; dcr->status_size = 4; + offset += dcr->header.length; - offset = offset + sizeof(struct acpi_nfit_control_region) * 4; /* dcr-descriptor0: pmem */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; @@ -1380,10 +1815,10 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->serial_number = ~handle[0]; dcr->code = NFIT_FIC_BYTEN; dcr->windows = 0; + offset += dcr->header.length; /* dcr-descriptor1: pmem */ - dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region, - window_size); + dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = offsetof(struct acpi_nfit_control_region, window_size); @@ -1392,10 +1827,10 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->serial_number = ~handle[1]; dcr->code = NFIT_FIC_BYTEN; dcr->windows = 0; + offset += dcr->header.length; /* dcr-descriptor2: pmem */ - dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region, - window_size) * 2; + dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = offsetof(struct acpi_nfit_control_region, window_size); @@ -1404,10 +1839,10 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->serial_number = ~handle[2]; dcr->code = NFIT_FIC_BYTEN; dcr->windows = 0; + offset += dcr->header.length; /* dcr-descriptor3: pmem */ - dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region, - window_size) * 3; + dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = offsetof(struct acpi_nfit_control_region, window_size); @@ -1416,54 +1851,56 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->serial_number = ~handle[3]; dcr->code = NFIT_FIC_BYTEN; dcr->windows = 0; + offset += dcr->header.length; - offset = offset + offsetof(struct acpi_nfit_control_region, - window_size) * 4; /* bdw0 (spa/dcr0, dimm0) */ bdw = nfit_buf + offset; bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; - bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->header.length = sizeof(*bdw); bdw->region_index = 0+1; bdw->windows = 1; bdw->offset = 0; bdw->size = BDW_SIZE; bdw->capacity = DIMM_SIZE; bdw->start_address = 0; + offset += bdw->header.length; /* bdw1 (spa/dcr1, dimm1) */ - bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region); + bdw = nfit_buf + offset; bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; - bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->header.length = sizeof(*bdw); bdw->region_index = 1+1; bdw->windows = 1; bdw->offset = 0; bdw->size = BDW_SIZE; bdw->capacity = DIMM_SIZE; bdw->start_address = 0; + offset += bdw->header.length; /* bdw2 (spa/dcr2, dimm2) */ - bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 2; + bdw = nfit_buf + offset; bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; - bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->header.length = sizeof(*bdw); bdw->region_index = 2+1; bdw->windows = 1; bdw->offset = 0; bdw->size = BDW_SIZE; bdw->capacity = DIMM_SIZE; bdw->start_address = 0; + offset += bdw->header.length; /* bdw3 (spa/dcr3, dimm3) */ - bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 3; + bdw = nfit_buf + offset; bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; - bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->header.length = sizeof(*bdw); bdw->region_index = 3+1; bdw->windows = 1; bdw->offset = 0; bdw->size = BDW_SIZE; bdw->capacity = DIMM_SIZE; bdw->start_address = 0; + offset += bdw->header.length; - offset = offset + sizeof(struct acpi_nfit_data_region) * 4; /* flush0 (dimm0) */ flush = nfit_buf + offset; flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; @@ -1472,40 +1909,52 @@ static void nfit_test0_setup(struct nfit_test *t) flush->hint_count = NUM_HINTS; for (i = 0; i < NUM_HINTS; i++) flush->hint_address[i] = t->flush_dma[0] + i * sizeof(u64); + offset += flush->header.length; /* flush1 (dimm1) */ - flush = nfit_buf + offset + flush_hint_size * 1; + flush = nfit_buf + offset; flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; flush->header.length = flush_hint_size; flush->device_handle = handle[1]; flush->hint_count = NUM_HINTS; for (i = 0; i < NUM_HINTS; i++) flush->hint_address[i] = t->flush_dma[1] + i * sizeof(u64); + offset += flush->header.length; /* flush2 (dimm2) */ - flush = nfit_buf + offset + flush_hint_size * 2; + flush = nfit_buf + offset; flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; flush->header.length = flush_hint_size; flush->device_handle = handle[2]; flush->hint_count = NUM_HINTS; for (i = 0; i < NUM_HINTS; i++) flush->hint_address[i] = t->flush_dma[2] + i * sizeof(u64); + offset += flush->header.length; /* flush3 (dimm3) */ - flush = nfit_buf + offset + flush_hint_size * 3; + flush = nfit_buf + offset; flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; flush->header.length = flush_hint_size; flush->device_handle = handle[3]; flush->hint_count = NUM_HINTS; for (i = 0; i < NUM_HINTS; i++) flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64); + offset += flush->header.length; + + /* platform capabilities */ + pcap = nfit_buf + offset; + pcap->header.type = ACPI_NFIT_TYPE_CAPABILITIES; + pcap->header.length = sizeof(*pcap); + pcap->highest_capability = 1; + pcap->capabilities = ACPI_NFIT_CAPABILITY_CACHE_FLUSH | + ACPI_NFIT_CAPABILITY_MEM_FLUSH; + offset += pcap->header.length; if (t->setup_hotplug) { - offset = offset + flush_hint_size * 4; /* dcr-descriptor4: blk */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; - dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->header.length = sizeof(*dcr); dcr->region_index = 8+1; dcr_common_init(dcr); dcr->serial_number = ~handle[4]; @@ -1516,8 +1965,8 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->command_size = 8; dcr->status_offset = 8; dcr->status_size = 4; + offset += dcr->header.length; - offset = offset + sizeof(struct acpi_nfit_control_region); /* dcr-descriptor4: pmem */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; @@ -1528,21 +1977,20 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->serial_number = ~handle[4]; dcr->code = NFIT_FIC_BYTEN; dcr->windows = 0; + offset += dcr->header.length; - offset = offset + offsetof(struct acpi_nfit_control_region, - window_size); /* bdw4 (spa/dcr4, dimm4) */ bdw = nfit_buf + offset; bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; - bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->header.length = sizeof(*bdw); bdw->region_index = 8+1; bdw->windows = 1; bdw->offset = 0; bdw->size = BDW_SIZE; bdw->capacity = DIMM_SIZE; bdw->start_address = 0; + offset += bdw->header.length; - offset = offset + sizeof(struct acpi_nfit_data_region); /* spa10 (dcr4) dimm4 */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; @@ -1551,30 +1999,32 @@ static void nfit_test0_setup(struct nfit_test *t) spa->range_index = 10+1; spa->address = t->dcr_dma[4]; spa->length = DCR_SIZE; + offset += spa->header.length; /* * spa11 (single-dimm interleave for hotplug, note storage * does not actually alias the related block-data-window * regions) */ - spa = nfit_buf + offset + sizeof(*spa); + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); spa->range_index = 11+1; spa->address = t->spa_set_dma[2]; spa->length = SPA0_SIZE; + offset += spa->header.length; /* spa12 (bdw for dcr4) dimm4 */ - spa = nfit_buf + offset + sizeof(*spa) * 2; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 12+1; spa->address = t->dimm_dma[4]; spa->length = DIMM_SIZE; + offset += spa->header.length; - offset = offset + sizeof(*spa) * 3; /* mem-region14 (spa/dcr4, dimm4) */ memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; @@ -1589,10 +2039,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; - /* mem-region15 (spa0, dimm4) */ - memdev = nfit_buf + offset + - sizeof(struct acpi_nfit_memory_map); + /* mem-region15 (spa11, dimm4) */ + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[4]; @@ -1606,10 +2056,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->interleave_index = 0; memdev->interleave_ways = 1; memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; + offset += memdev->header.length; /* mem-region16 (spa/bdw4, dimm4) */ - memdev = nfit_buf + offset + - sizeof(struct acpi_nfit_memory_map) * 2; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[4]; @@ -1622,8 +2072,8 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; - offset = offset + sizeof(struct acpi_nfit_memory_map) * 3; /* flush3 (dimm4) */ flush = nfit_buf + offset; flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; @@ -1633,8 +2083,14 @@ static void nfit_test0_setup(struct nfit_test *t) for (i = 0; i < NUM_HINTS; i++) flush->hint_address[i] = t->flush_dma[4] + i * sizeof(u64); + offset += flush->header.length; + + /* sanity check to make sure we've filled the buffer */ + WARN_ON(offset != t->nfit_size); } + t->nfit_filled = offset; + post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0], SPA0_SIZE); @@ -1642,17 +2098,25 @@ static void nfit_test0_setup(struct nfit_test *t) set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en); set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); - set_bit(ND_CMD_SMART, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_SMART, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_SMART_SET_THRESHOLD, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_SMART_INJECT, &acpi_desc->dimm_cmd_force_en); set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CALL, &acpi_desc->bus_cmd_force_en); - set_bit(ND_CMD_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en); set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_nfit_cmd_force_en); set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en); set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en); set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_nfit_cmd_force_en); + set_bit(ND_INTEL_FW_GET_INFO, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_FW_START_UPDATE, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_FW_SEND_DATA, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_FW_FINISH_UPDATE, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_FW_FINISH_QUERY, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en); } static void nfit_test1_setup(struct nfit_test *t) @@ -1673,17 +2137,18 @@ static void nfit_test1_setup(struct nfit_test *t) spa->range_index = 0+1; spa->address = t->spa_set_dma[0]; spa->length = SPA2_SIZE; + offset += spa->header.length; /* virtual cd region */ - spa = nfit_buf + sizeof(*spa); + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16); spa->range_index = 0; spa->address = t->spa_set_dma[1]; spa->length = SPA_VCD_SIZE; + offset += spa->header.length; - offset += sizeof(*spa) * 2; /* mem-region0 (spa0, dimm0) */ memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; @@ -1701,8 +2166,8 @@ static void nfit_test1_setup(struct nfit_test *t) memdev->flags = ACPI_NFIT_MEM_SAVE_FAILED | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED | ACPI_NFIT_MEM_HEALTH_OBSERVED | ACPI_NFIT_MEM_NOT_ARMED; + offset += memdev->header.length; - offset += sizeof(*memdev); /* dcr-descriptor0 */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; @@ -1713,8 +2178,8 @@ static void nfit_test1_setup(struct nfit_test *t) dcr->serial_number = ~handle[5]; dcr->code = NFIT_FIC_BYTE; dcr->windows = 0; - offset += dcr->header.length; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); @@ -1729,9 +2194,9 @@ static void nfit_test1_setup(struct nfit_test *t) memdev->interleave_index = 0; memdev->interleave_ways = 1; memdev->flags = ACPI_NFIT_MEM_MAP_FAILED; + offset += memdev->header.length; /* dcr-descriptor1 */ - offset += sizeof(*memdev); dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = offsetof(struct acpi_nfit_control_region, @@ -1741,6 +2206,12 @@ static void nfit_test1_setup(struct nfit_test *t) dcr->serial_number = ~handle[6]; dcr->code = NFIT_FIC_BYTE; dcr->windows = 0; + offset += dcr->header.length; + + /* sanity check to make sure we've filled the buffer */ + WARN_ON(offset != t->nfit_size); + + t->nfit_filled = offset; post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0], SPA2_SIZE); @@ -1750,6 +2221,7 @@ static void nfit_test1_setup(struct nfit_test *t) set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); + set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en); } static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, @@ -2054,10 +2526,18 @@ static int nfit_test_probe(struct platform_device *pdev) sizeof(struct nfit_test_dcr *), GFP_KERNEL); nfit_test->dcr_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t), GFP_KERNEL); + nfit_test->smart = devm_kcalloc(dev, num, + sizeof(struct nd_intel_smart), GFP_KERNEL); + nfit_test->smart_threshold = devm_kcalloc(dev, num, + sizeof(struct nd_intel_smart_threshold), + GFP_KERNEL); + nfit_test->fw = devm_kcalloc(dev, num, + sizeof(struct nfit_test_fw), GFP_KERNEL); if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label && nfit_test->label_dma && nfit_test->dcr && nfit_test->dcr_dma && nfit_test->flush - && nfit_test->flush_dma) + && nfit_test->flush_dma + && nfit_test->fw) /* pass */; else return -ENOMEM; @@ -2090,7 +2570,7 @@ static int nfit_test_probe(struct platform_device *pdev) nd_desc->ndctl = nfit_test_ctl; rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf, - nfit_test->nfit_size); + nfit_test->nfit_filled); if (rc) return rc; @@ -2159,6 +2639,11 @@ static __init int nfit_test_init(void) { int rc, i; + pmem_test(); + libnvdimm_test(); + acpi_nfit_test(); + device_dax_test(); + nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm); nfit_wq = create_singlethread_workqueue("nfit"); diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h index 113b44675a71..33752e06ff8d 100644 --- a/tools/testing/nvdimm/test/nfit_test.h +++ b/tools/testing/nvdimm/test/nfit_test.h @@ -84,6 +84,156 @@ struct nd_cmd_ars_err_inj_stat { } __packed record[0]; } __packed; +#define ND_INTEL_SMART 1 +#define ND_INTEL_SMART_THRESHOLD 2 +#define ND_INTEL_ENABLE_LSS_STATUS 10 +#define ND_INTEL_FW_GET_INFO 12 +#define ND_INTEL_FW_START_UPDATE 13 +#define ND_INTEL_FW_SEND_DATA 14 +#define ND_INTEL_FW_FINISH_UPDATE 15 +#define ND_INTEL_FW_FINISH_QUERY 16 +#define ND_INTEL_SMART_SET_THRESHOLD 17 +#define ND_INTEL_SMART_INJECT 18 + +#define ND_INTEL_SMART_HEALTH_VALID (1 << 0) +#define ND_INTEL_SMART_SPARES_VALID (1 << 1) +#define ND_INTEL_SMART_USED_VALID (1 << 2) +#define ND_INTEL_SMART_MTEMP_VALID (1 << 3) +#define ND_INTEL_SMART_CTEMP_VALID (1 << 4) +#define ND_INTEL_SMART_SHUTDOWN_COUNT_VALID (1 << 5) +#define ND_INTEL_SMART_AIT_STATUS_VALID (1 << 6) +#define ND_INTEL_SMART_PTEMP_VALID (1 << 7) +#define ND_INTEL_SMART_ALARM_VALID (1 << 9) +#define ND_INTEL_SMART_SHUTDOWN_VALID (1 << 10) +#define ND_INTEL_SMART_VENDOR_VALID (1 << 11) +#define ND_INTEL_SMART_SPARE_TRIP (1 << 0) +#define ND_INTEL_SMART_TEMP_TRIP (1 << 1) +#define ND_INTEL_SMART_CTEMP_TRIP (1 << 2) +#define ND_INTEL_SMART_NON_CRITICAL_HEALTH (1 << 0) +#define ND_INTEL_SMART_CRITICAL_HEALTH (1 << 1) +#define ND_INTEL_SMART_FATAL_HEALTH (1 << 2) +#define ND_INTEL_SMART_INJECT_MTEMP (1 << 0) +#define ND_INTEL_SMART_INJECT_SPARE (1 << 1) +#define ND_INTEL_SMART_INJECT_FATAL (1 << 2) +#define ND_INTEL_SMART_INJECT_SHUTDOWN (1 << 3) + +struct nd_intel_smart { + __u32 status; + union { + struct { + __u32 flags; + __u8 reserved0[4]; + __u8 health; + __u8 spares; + __u8 life_used; + __u8 alarm_flags; + __u16 media_temperature; + __u16 ctrl_temperature; + __u32 shutdown_count; + __u8 ait_status; + __u16 pmic_temperature; + __u8 reserved1[8]; + __u8 shutdown_state; + __u32 vendor_size; + __u8 vendor_data[92]; + } __packed; + __u8 data[128]; + }; +} __packed; + +struct nd_intel_smart_threshold { + __u32 status; + union { + struct { + __u16 alarm_control; + __u8 spares; + __u16 media_temperature; + __u16 ctrl_temperature; + __u8 reserved[1]; + } __packed; + __u8 data[8]; + }; +} __packed; + +struct nd_intel_smart_set_threshold { + __u16 alarm_control; + __u8 spares; + __u16 media_temperature; + __u16 ctrl_temperature; + __u32 status; +} __packed; + +struct nd_intel_smart_inject { + __u64 flags; + __u8 mtemp_enable; + __u16 media_temperature; + __u8 spare_enable; + __u8 spares; + __u8 fatal_enable; + __u8 unsafe_shutdown_enable; + __u32 status; +} __packed; + +#define INTEL_FW_STORAGE_SIZE 0x100000 +#define INTEL_FW_MAX_SEND_LEN 0xFFEC +#define INTEL_FW_QUERY_INTERVAL 250000 +#define INTEL_FW_QUERY_MAX_TIME 3000000 +#define INTEL_FW_FIS_VERSION 0x0105 +#define INTEL_FW_FAKE_VERSION 0xffffffffabcd + +enum intel_fw_update_state { + FW_STATE_NEW = 0, + FW_STATE_IN_PROGRESS, + FW_STATE_VERIFY, + FW_STATE_UPDATED, +}; + +struct nd_intel_fw_info { + __u32 status; + __u32 storage_size; + __u32 max_send_len; + __u32 query_interval; + __u32 max_query_time; + __u8 update_cap; + __u8 reserved[3]; + __u32 fis_version; + __u64 run_version; + __u64 updated_version; +} __packed; + +struct nd_intel_fw_start { + __u32 status; + __u32 context; +} __packed; + +/* this one has the output first because the variable input data size */ +struct nd_intel_fw_send_data { + __u32 context; + __u32 offset; + __u32 length; + __u8 data[0]; +/* this field is not declared due ot variable data from input */ +/* __u32 status; */ +} __packed; + +struct nd_intel_fw_finish_update { + __u8 ctrl_flags; + __u8 reserved[3]; + __u32 context; + __u32 status; +} __packed; + +struct nd_intel_fw_finish_query { + __u32 context; + __u32 status; + __u64 updated_fw_rev; +} __packed; + +struct nd_intel_lss { + __u8 enable; + __u32 status; +} __packed; + union acpi_object; typedef void *acpi_handle; diff --git a/tools/testing/nvdimm/watermark.h b/tools/testing/nvdimm/watermark.h new file mode 100644 index 000000000000..ed0528757bd4 --- /dev/null +++ b/tools/testing/nvdimm/watermark.h @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2018 Intel Corporation. All rights reserved. +#ifndef _TEST_NVDIMM_WATERMARK_H_ +#define _TEST_NVDIMM_WATERMARK_H_ +int pmem_test(void); +int libnvdimm_test(void); +int acpi_nfit_test(void); +int device_dax_test(void); + +/* + * dummy routine for nfit_test to validate it is linking to the properly + * mocked module and not the standard one from the base tree. + */ +#define nfit_test_watermark(x) \ +int x##_test(void) \ +{ \ + pr_debug("%s for nfit_test\n", KBUILD_MODNAME); \ + return 0; \ +} \ +EXPORT_SYMBOL(x##_test) +#endif /* _TEST_NVDIMM_WATERMARK_H_ */ diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 30cd0b296f1a..6c645eb77d42 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -153,11 +153,12 @@ void idr_nowait_test(void) idr_destroy(&idr); } -void idr_get_next_test(void) +void idr_get_next_test(int base) { unsigned long i; int nextid; DEFINE_IDR(idr); + idr_init_base(&idr, base); int indices[] = {4, 7, 9, 15, 65, 128, 1000, 99999, 0}; @@ -177,6 +178,55 @@ void idr_get_next_test(void) idr_destroy(&idr); } +int idr_u32_cb(int id, void *ptr, void *data) +{ + BUG_ON(id < 0); + BUG_ON(ptr != DUMMY_PTR); + return 0; +} + +void idr_u32_test1(struct idr *idr, u32 handle) +{ + static bool warned = false; + u32 id = handle; + int sid = 0; + void *ptr; + + BUG_ON(idr_alloc_u32(idr, DUMMY_PTR, &id, id, GFP_KERNEL)); + BUG_ON(id != handle); + BUG_ON(idr_alloc_u32(idr, DUMMY_PTR, &id, id, GFP_KERNEL) != -ENOSPC); + BUG_ON(id != handle); + if (!warned && id > INT_MAX) + printk("vvv Ignore these warnings\n"); + ptr = idr_get_next(idr, &sid); + if (id > INT_MAX) { + BUG_ON(ptr != NULL); + BUG_ON(sid != 0); + } else { + BUG_ON(ptr != DUMMY_PTR); + BUG_ON(sid != id); + } + idr_for_each(idr, idr_u32_cb, NULL); + if (!warned && id > INT_MAX) { + printk("^^^ Warnings over\n"); + warned = true; + } + BUG_ON(idr_remove(idr, id) != DUMMY_PTR); + BUG_ON(!idr_is_empty(idr)); +} + +void idr_u32_test(int base) +{ + DEFINE_IDR(idr); + idr_init_base(&idr, base); + idr_u32_test1(&idr, 10); + idr_u32_test1(&idr, 0x7fffffff); + idr_u32_test1(&idr, 0x80000000); + idr_u32_test1(&idr, 0x80000001); + idr_u32_test1(&idr, 0xffe00000); + idr_u32_test1(&idr, 0xffffffff); +} + void idr_checks(void) { unsigned long i; @@ -207,6 +257,7 @@ void idr_checks(void) assert(idr_alloc(&idr, item, i, i + 10, GFP_KERNEL) == i); } assert(idr_alloc(&idr, DUMMY_PTR, i - 2, i, GFP_KERNEL) == -ENOSPC); + assert(idr_alloc(&idr, DUMMY_PTR, i - 2, i + 10, GFP_KERNEL) == -ENOSPC); idr_for_each(&idr, item_idr_free, &idr); idr_destroy(&idr); @@ -214,6 +265,23 @@ void idr_checks(void) assert(idr_is_empty(&idr)); + idr_set_cursor(&idr, INT_MAX - 3UL); + for (i = INT_MAX - 3UL; i < INT_MAX + 3UL; i++) { + struct item *item; + unsigned int id; + if (i <= INT_MAX) + item = item_create(i, 0); + else + item = item_create(i - INT_MAX - 1, 0); + + id = idr_alloc_cyclic(&idr, item, 0, 0, GFP_KERNEL); + assert(id == item->index); + } + + idr_for_each(&idr, item_idr_free, &idr); + idr_destroy(&idr); + assert(idr_is_empty(&idr)); + for (i = 1; i < 10000; i++) { struct item *item = item_create(i, 0); assert(idr_alloc(&idr, item, 1, 20000, GFP_KERNEL) == i); @@ -226,7 +294,12 @@ void idr_checks(void) idr_alloc_test(); idr_null_test(); idr_nowait_test(); - idr_get_next_test(); + idr_get_next_test(0); + idr_get_next_test(1); + idr_get_next_test(4); + idr_u32_test(4); + idr_u32_test(1); + idr_u32_test(0); } /* @@ -380,7 +453,7 @@ void ida_check_random(void) do { ida_pre_get(&ida, GFP_KERNEL); err = ida_get_new_above(&ida, bit, &id); - } while (err == -ENOMEM); + } while (err == -EAGAIN); assert(!err); assert(id == bit); } @@ -489,7 +562,7 @@ static void *ida_random_fn(void *arg) void ida_thread_tests(void) { - pthread_t threads[10]; + pthread_t threads[20]; int i; for (i = 0; i < ARRAY_SIZE(threads); i++) diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c index 6903ccf35595..44a0d1ad4408 100644 --- a/tools/testing/radix-tree/linux.c +++ b/tools/testing/radix-tree/linux.c @@ -29,7 +29,7 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, int flags) { struct radix_tree_node *node; - if (flags & __GFP_NOWARN) + if (!(flags & __GFP_DIRECT_RECLAIM)) return NULL; pthread_mutex_lock(&cachep->lock); @@ -73,10 +73,17 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) void *kmalloc(size_t size, gfp_t gfp) { - void *ret = malloc(size); + void *ret; + + if (!(gfp & __GFP_DIRECT_RECLAIM)) + return NULL; + + ret = malloc(size); uatomic_inc(&nr_allocated); if (kmalloc_verbose) printf("Allocating %p from malloc\n", ret); + if (gfp & __GFP_ZERO) + memset(ret, 0, size); return ret; } diff --git a/tools/testing/radix-tree/linux/compiler_types.h b/tools/testing/radix-tree/linux/compiler_types.h new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/testing/radix-tree/linux/compiler_types.h diff --git a/tools/testing/radix-tree/linux/gfp.h b/tools/testing/radix-tree/linux/gfp.h index e9fff59dfd8a..32159c08a52e 100644 --- a/tools/testing/radix-tree/linux/gfp.h +++ b/tools/testing/radix-tree/linux/gfp.h @@ -11,6 +11,7 @@ #define __GFP_IO 0x40u #define __GFP_FS 0x80u #define __GFP_NOWARN 0x200u +#define __GFP_ZERO 0x8000u #define __GFP_ATOMIC 0x80000u #define __GFP_ACCOUNT 0x100000u #define __GFP_DIRECT_RECLAIM 0x400000u @@ -18,6 +19,7 @@ #define __GFP_RECLAIM (__GFP_DIRECT_RECLAIM|__GFP_KSWAPD_RECLAIM) +#define GFP_ZONEMASK 0x0fu #define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) #define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) #define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h index c3bc3f364f68..426f32f28547 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/radix-tree/linux/kernel.h @@ -17,6 +17,4 @@ #define pr_debug printk #define pr_cont printk -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) - #endif /* _KERNEL_H */ diff --git a/tools/testing/radix-tree/linux/slab.h b/tools/testing/radix-tree/linux/slab.h index 979baeec7e70..a037def0dec6 100644 --- a/tools/testing/radix-tree/linux/slab.h +++ b/tools/testing/radix-tree/linux/slab.h @@ -3,6 +3,7 @@ #define SLAB_H #include <linux/types.h> +#include <linux/gfp.h> #define SLAB_HWCACHE_ALIGN 1 #define SLAB_PANIC 2 @@ -11,6 +12,11 @@ void *kmalloc(size_t size, gfp_t); void kfree(void *); +static inline void *kzalloc(size_t size, gfp_t gfp) +{ + return kmalloc(size, gfp | __GFP_ZERO); +} + void *kmem_cache_alloc(struct kmem_cache *cachep, int flags); void kmem_cache_free(struct kmem_cache *cachep, void *objp); diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index eaf599dc2137..32aafa92074c 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -7,6 +7,7 @@ TARGETS += cpufreq TARGETS += cpu-hotplug TARGETS += efivarfs TARGETS += exec +TARGETS += filesystems TARGETS += firmware TARGETS += ftrace TARGETS += futex @@ -14,6 +15,7 @@ TARGETS += gpio TARGETS += intel_pstate TARGETS += ipc TARGETS += kcmp +TARGETS += kvm TARGETS += lib TARGETS += membarrier TARGETS += memfd @@ -23,6 +25,7 @@ TARGETS += mqueue TARGETS += net TARGETS += nsfs TARGETS += powerpc +TARGETS += proc TARGETS += pstore TARGETS += ptrace TARGETS += seccomp @@ -66,6 +69,12 @@ ifndef BUILD BUILD := $(shell pwd) endif +# KSFT_TAP_LEVEL is used from KSFT framework to prevent nested TAP header +# printing from tests. Applicable to run_tests case where run_tests adds +# TAP header prior running tests and when a test program invokes another +# with system() call. Export it here to cover override RUN_TESTS defines. +export KSFT_TAP_LEVEL=`echo 1` + export BUILD all: @for TARGET in $(TARGETS); do \ @@ -116,13 +125,23 @@ ifdef INSTALL_PATH @# Ask all targets to emit their test scripts echo "#!/bin/sh" > $(ALL_SCRIPT) - echo "cd \$$(dirname \$$0)" >> $(ALL_SCRIPT) + echo "BASE_DIR=\$$(realpath \$$(dirname \$$0))" >> $(ALL_SCRIPT) + echo "cd \$$BASE_DIR" >> $(ALL_SCRIPT) echo "ROOT=\$$PWD" >> $(ALL_SCRIPT) + echo "if [ \"\$$1\" = \"--summary\" ]; then" >> $(ALL_SCRIPT) + echo " OUTPUT=\$$BASE_DIR/output.log" >> $(ALL_SCRIPT) + echo " cat /dev/null > \$$OUTPUT" >> $(ALL_SCRIPT) + echo "else" >> $(ALL_SCRIPT) + echo " OUTPUT=/dev/stdout" >> $(ALL_SCRIPT) + echo "fi" >> $(ALL_SCRIPT) + echo "export KSFT_TAP_LEVEL=`echo 1`" >> $(ALL_SCRIPT) for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ - echo "echo ; echo Running tests in $$TARGET" >> $(ALL_SCRIPT); \ + echo "echo ; echo TAP version 13" >> $(ALL_SCRIPT); \ + echo "echo Running tests in $$TARGET" >> $(ALL_SCRIPT); \ echo "echo ========================================" >> $(ALL_SCRIPT); \ + echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \ echo "cd $$TARGET" >> $(ALL_SCRIPT); \ make -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \ echo "cd \$$ROOT" >> $(ALL_SCRIPT); \ diff --git a/tools/testing/selftests/android/Makefile b/tools/testing/selftests/android/Makefile index 1a7492268993..f6304d2be90c 100644 --- a/tools/testing/selftests/android/Makefile +++ b/tools/testing/selftests/android/Makefile @@ -11,11 +11,11 @@ all: BUILD_TARGET=$(OUTPUT)/$$DIR; \ mkdir $$BUILD_TARGET -p; \ make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ - #SUBDIR test prog name should be in the form: SUBDIR_test.sh + #SUBDIR test prog name should be in the form: SUBDIR_test.sh \ TEST=$$DIR"_test.sh"; \ - if [ -e $$DIR/$$TEST ]; then - rsync -a $$DIR/$$TEST $$BUILD_TARGET/; - fi + if [ -e $$DIR/$$TEST ]; then \ + rsync -a $$DIR/$$TEST $$BUILD_TARGET/; \ + fi \ done override define RUN_TESTS diff --git a/tools/testing/selftests/android/ion/.gitignore b/tools/testing/selftests/android/ion/.gitignore index 67e6f391b2a9..95e8f4561474 100644 --- a/tools/testing/selftests/android/ion/.gitignore +++ b/tools/testing/selftests/android/ion/.gitignore @@ -1,2 +1,3 @@ ionapp_export ionapp_import +ionmap_test diff --git a/tools/testing/selftests/android/ion/Makefile b/tools/testing/selftests/android/ion/Makefile index 96e0c448b39d..e03695287f76 100644 --- a/tools/testing/selftests/android/ion/Makefile +++ b/tools/testing/selftests/android/ion/Makefile @@ -1,8 +1,8 @@ -INCLUDEDIR := -I. -I../../../../../drivers/staging/android/uapi/ +INCLUDEDIR := -I. -I../../../../../drivers/staging/android/uapi/ -I../../../../../usr/include/ CFLAGS := $(CFLAGS) $(INCLUDEDIR) -Wall -O2 -g -TEST_GEN_FILES := ionapp_export ionapp_import +TEST_GEN_FILES := ionapp_export ionapp_import ionmap_test all: $(TEST_GEN_FILES) @@ -14,3 +14,4 @@ include ../../lib.mk $(OUTPUT)/ionapp_export: ionapp_export.c ipcsocket.c ionutils.c $(OUTPUT)/ionapp_import: ionapp_import.c ipcsocket.c ionutils.c +$(OUTPUT)/ionmap_test: ionmap_test.c ionutils.c diff --git a/tools/testing/selftests/android/ion/config b/tools/testing/selftests/android/ion/config index 19db6ca9aa2b..b4ad748a9dd9 100644 --- a/tools/testing/selftests/android/ion/config +++ b/tools/testing/selftests/android/ion/config @@ -2,3 +2,4 @@ CONFIG_ANDROID=y CONFIG_STAGING=y CONFIG_ION=y CONFIG_ION_SYSTEM_HEAP=y +CONFIG_DRM_VGEM=y diff --git a/tools/testing/selftests/android/ion/ionmap_test.c b/tools/testing/selftests/android/ion/ionmap_test.c new file mode 100644 index 000000000000..dab36b06b37d --- /dev/null +++ b/tools/testing/selftests/android/ion/ionmap_test.c @@ -0,0 +1,136 @@ +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <unistd.h> + +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include <linux/dma-buf.h> + +#include <drm/drm.h> + +#include "ion.h" +#include "ionutils.h" + +int check_vgem(int fd) +{ + drm_version_t version = { 0 }; + char name[5]; + int ret; + + version.name_len = 4; + version.name = name; + + ret = ioctl(fd, DRM_IOCTL_VERSION, &version); + if (ret) + return 1; + + return strcmp(name, "vgem"); +} + +int open_vgem(void) +{ + int i, fd; + const char *drmstr = "/dev/dri/card"; + + fd = -1; + for (i = 0; i < 16; i++) { + char name[80]; + + sprintf(name, "%s%u", drmstr, i); + + fd = open(name, O_RDWR); + if (fd < 0) + continue; + + if (check_vgem(fd)) { + close(fd); + continue; + } else { + break; + } + + } + return fd; +} + +int import_vgem_fd(int vgem_fd, int dma_buf_fd, uint32_t *handle) +{ + struct drm_prime_handle import_handle = { 0 }; + int ret; + + import_handle.fd = dma_buf_fd; + import_handle.flags = 0; + import_handle.handle = 0; + + ret = ioctl(vgem_fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &import_handle); + if (ret == 0) + *handle = import_handle.handle; + return ret; +} + +void close_handle(int vgem_fd, uint32_t handle) +{ + struct drm_gem_close close = { 0 }; + + close.handle = handle; + ioctl(vgem_fd, DRM_IOCTL_GEM_CLOSE, &close); +} + +int main() +{ + int ret, vgem_fd; + struct ion_buffer_info info; + uint32_t handle = 0; + struct dma_buf_sync sync = { 0 }; + + info.heap_type = ION_HEAP_TYPE_SYSTEM; + info.heap_size = 4096; + info.flag_type = ION_FLAG_CACHED; + + ret = ion_export_buffer_fd(&info); + if (ret < 0) { + printf("ion buffer alloc failed\n"); + return -1; + } + + vgem_fd = open_vgem(); + if (vgem_fd < 0) { + ret = vgem_fd; + printf("Failed to open vgem\n"); + goto out_ion; + } + + ret = import_vgem_fd(vgem_fd, info.buffd, &handle); + + if (ret < 0) { + printf("Failed to import buffer\n"); + goto out_vgem; + } + + sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_RW; + ret = ioctl(info.buffd, DMA_BUF_IOCTL_SYNC, &sync); + if (ret) + printf("sync start failed %d\n", errno); + + memset(info.buffer, 0xff, 4096); + + sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_RW; + ret = ioctl(info.buffd, DMA_BUF_IOCTL_SYNC, &sync); + if (ret) + printf("sync end failed %d\n", errno); + + close_handle(vgem_fd, handle); + ret = 0; + +out_vgem: + close(vgem_fd); +out_ion: + ion_close_buffer_fd(&info); + printf("done.\n"); + return ret; +} diff --git a/tools/testing/selftests/android/ion/ionutils.c b/tools/testing/selftests/android/ion/ionutils.c index ce69c14f51fa..7d1d37c4ef6a 100644 --- a/tools/testing/selftests/android/ion/ionutils.c +++ b/tools/testing/selftests/android/ion/ionutils.c @@ -80,11 +80,6 @@ int ion_export_buffer_fd(struct ion_buffer_info *ion_info) heap_id = MAX_HEAP_COUNT + 1; for (i = 0; i < query.cnt; i++) { if (heap_data[i].type == ion_info->heap_type) { - printf("--------------------------------------\n"); - printf("heap type: %d\n", heap_data[i].type); - printf(" heap id: %d\n", heap_data[i].heap_id); - printf("heap name: %s\n", heap_data[i].name); - printf("--------------------------------------\n"); heap_id = heap_data[i].heap_id; break; } @@ -204,7 +199,6 @@ void ion_close_buffer_fd(struct ion_buffer_info *ion_info) /* Finally, close the client fd */ if (ion_info->ionfd > 0) close(ion_info->ionfd); - printf("<%s>: buffer release successfully....\n", __func__); } } diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 541d9d7fad5a..9cf83f895d98 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -3,3 +3,12 @@ test_maps test_lru_map test_lpm_map test_tag +FEATURE-DUMP.libbpf +fixdep +test_align +test_dev_cgroup +test_progs +test_tcpbpf_user +test_verifier_log +feature +test_libbpf_open diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 9316e648a880..0a315ddabbf4 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -11,23 +11,51 @@ ifneq ($(wildcard $(GENHDR)),) endif CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include -LDLIBS += -lcap -lelf -lrt +LDLIBS += -lcap -lelf -lrt -lpthread +TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read +all: $(TEST_CUSTOM_PROGS) + +$(TEST_CUSTOM_PROGS): urandom_read + +urandom_read: urandom_read.c + $(CC) -o $(TEST_CUSTOM_PROGS) -static $< + +# Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ - test_align test_verifier_log test_dev_cgroup + test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ + test_sock test_sock_addr TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ - sockmap_verdict_prog.o dev_cgroup.o - -TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh + sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ + test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ + sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \ + sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o + +# Order correspond to 'make run_tests' order +TEST_PROGS := test_kmod.sh \ + test_libbpf.sh \ + test_xdp_redirect.sh \ + test_xdp_meta.sh \ + test_offload.py \ + test_sock_addr.sh + +# Compile but not part of 'make run_tests' +TEST_GEN_PROGS_EXTENDED = test_libbpf_open include ../lib.mk -BPFOBJ := $(OUTPUT)/libbpf.a $(OUTPUT)/cgroup_helpers.c +BPFOBJ := $(OUTPUT)/libbpf.a $(TEST_GEN_PROGS): $(BPFOBJ) +$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a + +$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c +$(OUTPUT)/test_sock: cgroup_helpers.c +$(OUTPUT)/test_sock_addr: cgroup_helpers.c + .PHONY: force # force a rebuild of BPFOBJ when its dependencies are updated @@ -49,8 +77,15 @@ else CPU ?= generic endif -%.o: %.c - $(CLANG) -I. -I./include/uapi -I../../../include/uapi \ - -Wno-compare-distinct-pointer-types \ +CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \ + -Wno-compare-distinct-pointer-types + +$(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline +$(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline + +$(OUTPUT)/%.o: %.c + $(CLANG) $(CLANG_FLAGS) \ -O2 -target bpf -emit-llvm -c $< -o - | \ $(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@ + +EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index fd9a17fa8a8b..d8223d99f96d 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -71,6 +71,8 @@ static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, static int (*bpf_getsockopt)(void *ctx, int level, int optname, void *optval, int optlen) = (void *) BPF_FUNC_getsockopt; +static int (*bpf_sock_ops_cb_flags_set)(void *ctx, int flags) = + (void *) BPF_FUNC_sock_ops_cb_flags_set; static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) = (void *) BPF_FUNC_sk_redirect_map; static int (*bpf_sock_map_update)(void *map, void *key, void *value, @@ -82,7 +84,18 @@ static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags, static int (*bpf_perf_prog_read_value)(void *ctx, void *buf, unsigned int buf_size) = (void *) BPF_FUNC_perf_prog_read_value; - +static int (*bpf_override_return)(void *ctx, unsigned long rc) = + (void *) BPF_FUNC_override_return; +static int (*bpf_msg_redirect_map)(void *ctx, void *map, int key, int flags) = + (void *) BPF_FUNC_msg_redirect_map; +static int (*bpf_msg_apply_bytes)(void *ctx, int len) = + (void *) BPF_FUNC_msg_apply_bytes; +static int (*bpf_msg_cork_bytes)(void *ctx, int len) = + (void *) BPF_FUNC_msg_cork_bytes; +static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) = + (void *) BPF_FUNC_msg_pull_data; +static int (*bpf_bind)(void *ctx, void *addr, int addr_len) = + (void *) BPF_FUNC_bind; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions @@ -120,6 +133,8 @@ static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) = (void *) BPF_FUNC_skb_under_cgroup; static int (*bpf_skb_change_head)(void *, int len, int flags) = (void *) BPF_FUNC_skb_change_head; +static int (*bpf_skb_pull_data)(void *, int len) = + (void *) BPF_FUNC_skb_pull_data; /* Scan the ARCH passed in from ARCH env variable (see Makefile) */ #if defined(__TARGET_ARCH_x86) diff --git a/tools/testing/selftests/bpf/bpf_rlimit.h b/tools/testing/selftests/bpf/bpf_rlimit.h new file mode 100644 index 000000000000..9dac9b30f8ef --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_rlimit.h @@ -0,0 +1,28 @@ +#include <sys/resource.h> +#include <stdio.h> + +static __attribute__((constructor)) void bpf_rlimit_ctor(void) +{ + struct rlimit rlim_old, rlim_new = { + .rlim_cur = RLIM_INFINITY, + .rlim_max = RLIM_INFINITY, + }; + + getrlimit(RLIMIT_MEMLOCK, &rlim_old); + /* For the sake of running the test cases, we temporarily + * set rlimit to infinity in order for kernel to focus on + * errors from actual test cases and not getting noise + * from hitting memlock limits. The limit is on per-process + * basis and not a global one, hence destructor not really + * needed here. + */ + if (setrlimit(RLIMIT_MEMLOCK, &rlim_new) < 0) { + perror("Unable to lift memlock rlimit"); + /* Trying out lower limit, but expect potential test + * case failures from this! + */ + rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20); + rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20); + setrlimit(RLIMIT_MEMLOCK, &rlim_new); + } +} diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 52d53ed08769..983dd25d49f4 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -3,3 +3,5 @@ CONFIG_BPF_SYSCALL=y CONFIG_NET_CLS_BPF=m CONFIG_BPF_EVENTS=y CONFIG_TEST_BPF=m +CONFIG_CGROUP_BPF=y +CONFIG_NETDEVSIM=m diff --git a/tools/testing/selftests/bpf/connect4_prog.c b/tools/testing/selftests/bpf/connect4_prog.c new file mode 100644 index 000000000000..5a88a681d2ab --- /dev/null +++ b/tools/testing/selftests/bpf/connect4_prog.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <string.h> + +#include <linux/stddef.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <sys/socket.h> + +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define SRC_REWRITE_IP4 0x7f000004U +#define DST_REWRITE_IP4 0x7f000001U +#define DST_REWRITE_PORT4 4444 + +int _version SEC("version") = 1; + +SEC("cgroup/connect4") +int connect_v4_prog(struct bpf_sock_addr *ctx) +{ + struct sockaddr_in sa; + + /* Rewrite destination. */ + ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4); + ctx->user_port = bpf_htons(DST_REWRITE_PORT4); + + if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) { + ///* Rewrite source. */ + memset(&sa, 0, sizeof(sa)); + + sa.sin_family = AF_INET; + sa.sin_port = bpf_htons(0); + sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4); + + if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) + return 0; + } + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/connect6_prog.c b/tools/testing/selftests/bpf/connect6_prog.c new file mode 100644 index 000000000000..8ea3f7d12dee --- /dev/null +++ b/tools/testing/selftests/bpf/connect6_prog.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <string.h> + +#include <linux/stddef.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <sys/socket.h> + +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define SRC_REWRITE_IP6_0 0 +#define SRC_REWRITE_IP6_1 0 +#define SRC_REWRITE_IP6_2 0 +#define SRC_REWRITE_IP6_3 6 + +#define DST_REWRITE_IP6_0 0 +#define DST_REWRITE_IP6_1 0 +#define DST_REWRITE_IP6_2 0 +#define DST_REWRITE_IP6_3 1 + +#define DST_REWRITE_PORT6 6666 + +int _version SEC("version") = 1; + +SEC("cgroup/connect6") +int connect_v6_prog(struct bpf_sock_addr *ctx) +{ + struct sockaddr_in6 sa; + + /* Rewrite destination. */ + ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0); + ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_1); + ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2); + ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_3); + + ctx->user_port = bpf_htons(DST_REWRITE_PORT6); + + if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) { + /* Rewrite source. */ + memset(&sa, 0, sizeof(sa)); + + sa.sin6_family = AF_INET6; + sa.sin6_port = bpf_htons(0); + + sa.sin6_addr.s6_addr32[0] = bpf_htonl(SRC_REWRITE_IP6_0); + sa.sin6_addr.s6_addr32[1] = bpf_htonl(SRC_REWRITE_IP6_1); + sa.sin6_addr.s6_addr32[2] = bpf_htonl(SRC_REWRITE_IP6_2); + sa.sin6_addr.s6_addr32[3] = bpf_htonl(SRC_REWRITE_IP6_3); + + if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) + return 0; + } + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/sample_map_ret0.c b/tools/testing/selftests/bpf/sample_map_ret0.c new file mode 100644 index 000000000000..0756303676ac --- /dev/null +++ b/tools/testing/selftests/bpf/sample_map_ret0.c @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ +#include <linux/bpf.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") htab = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__u32), + .value_size = sizeof(long), + .max_entries = 2, +}; + +struct bpf_map_def SEC("maps") array = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(long), + .max_entries = 2, +}; + +/* Sample program which should always load for testing control paths. */ +SEC(".text") int func() +{ + __u64 key64 = 0; + __u32 key = 0; + long *value; + + value = bpf_map_lookup_elem(&htab, &key); + if (!value) + return 1; + value = bpf_map_lookup_elem(&array, &key64); + if (!value) + return 1; + + return 0; +} diff --git a/tools/testing/selftests/bpf/sample_ret0.c b/tools/testing/selftests/bpf/sample_ret0.c new file mode 100644 index 000000000000..fec99750d6ea --- /dev/null +++ b/tools/testing/selftests/bpf/sample_ret0.c @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ + +/* Sample program which should always load for testing control paths. */ +int func() +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/sockmap_parse_prog.c b/tools/testing/selftests/bpf/sockmap_parse_prog.c index a1dec2b6d9c5..0f92858f6226 100644 --- a/tools/testing/selftests/bpf/sockmap_parse_prog.c +++ b/tools/testing/selftests/bpf/sockmap_parse_prog.c @@ -20,14 +20,25 @@ int bpf_prog1(struct __sk_buff *skb) __u32 lport = skb->local_port; __u32 rport = skb->remote_port; __u8 *d = data; + __u32 len = (__u32) data_end - (__u32) data; + int err; - if (data + 10 > data_end) - return skb->len; + if (data + 10 > data_end) { + err = bpf_skb_pull_data(skb, 10); + if (err) + return SK_DROP; + + data_end = (void *)(long)skb->data_end; + data = (void *)(long)skb->data; + if (data + 10 > data_end) + return SK_DROP; + } /* This write/read is a bit pointless but tests the verifier and * strparser handler for read/write pkt data and access into sk * fields. */ + d = data; d[7] = 1; return skb->len; } diff --git a/tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c new file mode 100644 index 000000000000..12a7b5c82ed6 --- /dev/null +++ b/tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c @@ -0,0 +1,33 @@ +#include <linux/bpf.h> +#include "bpf_helpers.h" +#include "bpf_util.h" +#include "bpf_endian.h" + +int _version SEC("version") = 1; + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sk_msg1") +int bpf_prog1(struct sk_msg_md *msg) +{ + void *data_end = (void *)(long) msg->data_end; + void *data = (void *)(long) msg->data; + + char *d; + + if (data + 8 > data_end) + return SK_DROP; + + bpf_printk("data length %i\n", (__u64)msg->data_end - (__u64)msg->data); + d = (char *)data; + bpf_printk("hello sendmsg hook %i %i\n", d[0], d[1]); + + return SK_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/sockmap_verdict_prog.c index d7bea972cb21..2ce7634a4012 100644 --- a/tools/testing/selftests/bpf/sockmap_verdict_prog.c +++ b/tools/testing/selftests/bpf/sockmap_verdict_prog.c @@ -26,6 +26,13 @@ struct bpf_map_def SEC("maps") sock_map_tx = { .max_entries = 20, }; +struct bpf_map_def SEC("maps") sock_map_msg = { + .type = BPF_MAP_TYPE_SOCKMAP, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 20, +}; + struct bpf_map_def SEC("maps") sock_map_break = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py new file mode 100755 index 000000000000..481dccdf140c --- /dev/null +++ b/tools/testing/selftests/bpf/tcp_client.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python2 +# +# SPDX-License-Identifier: GPL-2.0 +# + +import sys, os, os.path, getopt +import socket, time +import subprocess +import select + +def read(sock, n): + buf = '' + while len(buf) < n: + rem = n - len(buf) + try: s = sock.recv(rem) + except (socket.error), e: return '' + buf += s + return buf + +def send(sock, s): + total = len(s) + count = 0 + while count < total: + try: n = sock.send(s) + except (socket.error), e: n = 0 + if n == 0: + return count; + count += n + return count + + +serverPort = int(sys.argv[1]) +HostName = socket.gethostname() + +# create active socket +sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) +try: + sock.connect((HostName, serverPort)) +except socket.error as e: + sys.exit(1) + +buf = '' +n = 0 +while n < 1000: + buf += '+' + n += 1 + +sock.settimeout(1); +n = send(sock, buf) +n = read(sock, 500) +sys.exit(0) diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py new file mode 100755 index 000000000000..bc454d7d0be2 --- /dev/null +++ b/tools/testing/selftests/bpf/tcp_server.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python2 +# +# SPDX-License-Identifier: GPL-2.0 +# + +import sys, os, os.path, getopt +import socket, time +import subprocess +import select + +def read(sock, n): + buf = '' + while len(buf) < n: + rem = n - len(buf) + try: s = sock.recv(rem) + except (socket.error), e: return '' + buf += s + return buf + +def send(sock, s): + total = len(s) + count = 0 + while count < total: + try: n = sock.send(s) + except (socket.error), e: n = 0 + if n == 0: + return count; + count += n + return count + + +SERVER_PORT = 12877 +MAX_PORTS = 2 + +serverPort = SERVER_PORT +serverSocket = None + +HostName = socket.gethostname() + +# create passive socket +serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) +host = socket.gethostname() + +try: serverSocket.bind((host, 0)) +except socket.error as msg: + print 'bind fails: ', msg + +sn = serverSocket.getsockname() +serverPort = sn[1] + +cmdStr = ("./tcp_client.py %d &") % (serverPort) +os.system(cmdStr) + +buf = '' +n = 0 +while n < 500: + buf += '.' + n += 1 + +serverSocket.listen(MAX_PORTS) +readList = [serverSocket] + +while True: + readyRead, readyWrite, inError = \ + select.select(readList, [], [], 2) + + if len(readyRead) > 0: + waitCount = 0 + for sock in readyRead: + if sock == serverSocket: + (clientSocket, address) = serverSocket.accept() + address = str(address[0]) + readList.append(clientSocket) + else: + sock.settimeout(1); + s = read(sock, 1000) + n = send(sock, buf) + sock.close() + serverSocket.close() + sys.exit(0) + else: + print 'Select timeout!' + sys.exit(1) diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c index 471bbbdb94db..6b1b302310fe 100644 --- a/tools/testing/selftests/bpf/test_align.c +++ b/tools/testing/selftests/bpf/test_align.c @@ -9,8 +9,6 @@ #include <stddef.h> #include <stdbool.h> -#include <sys/resource.h> - #include <linux/unistd.h> #include <linux/filter.h> #include <linux/bpf_perf_event.h> @@ -19,6 +17,7 @@ #include <bpf/bpf.h> #include "../../../include/linux/filter.h" +#include "bpf_rlimit.h" #ifndef ARRAY_SIZE # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) @@ -64,11 +63,11 @@ static struct bpf_align_test tests[] = { .matches = { {1, "R1=ctx(id=0,off=0,imm=0)"}, {1, "R10=fp0"}, - {1, "R3=inv2"}, - {2, "R3=inv4"}, - {3, "R3=inv8"}, - {4, "R3=inv16"}, - {5, "R3=inv32"}, + {1, "R3_w=inv2"}, + {2, "R3_w=inv4"}, + {3, "R3_w=inv8"}, + {4, "R3_w=inv16"}, + {5, "R3_w=inv32"}, }, }, { @@ -92,17 +91,17 @@ static struct bpf_align_test tests[] = { .matches = { {1, "R1=ctx(id=0,off=0,imm=0)"}, {1, "R10=fp0"}, - {1, "R3=inv1"}, - {2, "R3=inv2"}, - {3, "R3=inv4"}, - {4, "R3=inv8"}, - {5, "R3=inv16"}, - {6, "R3=inv1"}, - {7, "R4=inv32"}, - {8, "R4=inv16"}, - {9, "R4=inv8"}, - {10, "R4=inv4"}, - {11, "R4=inv2"}, + {1, "R3_w=inv1"}, + {2, "R3_w=inv2"}, + {3, "R3_w=inv4"}, + {4, "R3_w=inv8"}, + {5, "R3_w=inv16"}, + {6, "R3_w=inv1"}, + {7, "R4_w=inv32"}, + {8, "R4_w=inv16"}, + {9, "R4_w=inv8"}, + {10, "R4_w=inv4"}, + {11, "R4_w=inv2"}, }, }, { @@ -121,12 +120,12 @@ static struct bpf_align_test tests[] = { .matches = { {1, "R1=ctx(id=0,off=0,imm=0)"}, {1, "R10=fp0"}, - {1, "R3=inv4"}, - {2, "R3=inv8"}, - {3, "R3=inv10"}, - {4, "R4=inv8"}, - {5, "R4=inv12"}, - {6, "R4=inv14"}, + {1, "R3_w=inv4"}, + {2, "R3_w=inv8"}, + {3, "R3_w=inv10"}, + {4, "R4_w=inv8"}, + {5, "R4_w=inv12"}, + {6, "R4_w=inv14"}, }, }, { @@ -143,10 +142,10 @@ static struct bpf_align_test tests[] = { .matches = { {1, "R1=ctx(id=0,off=0,imm=0)"}, {1, "R10=fp0"}, - {1, "R3=inv7"}, - {2, "R3=inv7"}, - {3, "R3=inv14"}, - {4, "R3=inv56"}, + {1, "R3_w=inv7"}, + {2, "R3_w=inv7"}, + {3, "R3_w=inv14"}, + {4, "R3_w=inv56"}, }, }, @@ -185,18 +184,18 @@ static struct bpf_align_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { {7, "R0=pkt(id=0,off=8,r=8,imm=0)"}, - {7, "R3=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {8, "R3=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, - {9, "R3=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {10, "R3=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, - {11, "R3=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {8, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {9, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {10, "R3_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {11, "R3_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, {18, "R3=pkt_end(id=0,off=0,imm=0)"}, - {18, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {19, "R4=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"}, - {20, "R4=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, - {21, "R4=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, - {22, "R4=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {23, "R4=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {18, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {19, "R4_w=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"}, + {20, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + {21, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {22, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {23, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, }, }, { @@ -217,16 +216,16 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {7, "R3=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {8, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {9, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {10, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {11, "R4=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, - {12, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {13, "R4=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {14, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {15, "R4=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, - {16, "R4=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {12, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {14, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, }, }, { @@ -257,14 +256,14 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {4, "R5=pkt(id=0,off=0,r=0,imm=0)"}, - {5, "R5=pkt(id=0,off=14,r=0,imm=0)"}, - {6, "R4=pkt(id=0,off=14,r=0,imm=0)"}, + {4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"}, + {5, "R5_w=pkt(id=0,off=14,r=0,imm=0)"}, + {6, "R4_w=pkt(id=0,off=14,r=0,imm=0)"}, {10, "R2=pkt(id=0,off=0,r=18,imm=0)"}, {10, "R5=pkt(id=0,off=14,r=18,imm=0)"}, - {10, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {14, "R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, - {15, "R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, + {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {14, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, + {15, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, }, }, { @@ -320,11 +319,11 @@ static struct bpf_align_test tests[] = { * alignment of 4. */ {8, "R2=pkt(id=0,off=0,r=8,imm=0)"}, - {8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Offset is added to packet pointer R5, resulting in * known fixed offset, and variable offset from R6. */ - {11, "R5=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {11, "R5_w=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* At the time the word size load is performed from R5, * it's total offset is NET_IP_ALIGN + reg->off (0) + * reg->aux_off (14) which is 16. Then the variable @@ -336,11 +335,11 @@ static struct bpf_align_test tests[] = { /* Variable offset is added to R5 packet pointer, * resulting in auxiliary alignment of 4. */ - {18, "R5=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {18, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Constant offset is added to R5, resulting in * reg->off of 14. */ - {19, "R5=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {19, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off * (14) which is 16. Then the variable offset is 4-byte @@ -352,18 +351,18 @@ static struct bpf_align_test tests[] = { /* Constant offset is added to R5 packet pointer, * resulting in reg->off value of 14. */ - {26, "R5=pkt(id=0,off=14,r=8"}, + {26, "R5_w=pkt(id=0,off=14,r=8"}, /* Variable offset is added to R5, resulting in a * variable offset of (4n). */ - {27, "R5=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {27, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Constant is added to R5 again, setting reg->off to 18. */ - {28, "R5=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {28, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* And once more we add a variable; resulting var_off * is still (4n), fixed offset is not changed. * Also, we create a new reg->id. */ - {29, "R5=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"}, + {29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (18) * which is 20. Then the variable offset is (4n), so @@ -410,11 +409,11 @@ static struct bpf_align_test tests[] = { * alignment of 4. */ {8, "R2=pkt(id=0,off=0,r=8,imm=0)"}, - {8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Adding 14 makes R6 be (4n+2) */ - {9, "R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* Packet pointer has (4n+2) offset */ - {11, "R5=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, {13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) @@ -426,11 +425,11 @@ static struct bpf_align_test tests[] = { /* Newly read value in R6 was shifted left by 2, so has * known alignment of 4. */ - {18, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {18, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Added (4n) to packet pointer's (4n+2) var_off, giving * another (4n+2). */ - {19, "R5=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, + {19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, {21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) @@ -446,11 +445,9 @@ static struct bpf_align_test tests[] = { .insns = { PREP_PKT_POINTERS, BPF_MOV64_IMM(BPF_REG_0, 0), - /* ptr & const => unknown & const */ - BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), - BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 0x40), - /* ptr << const => unknown << const */ - BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + /* (ptr - ptr) << 2 */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), + BPF_ALU64_REG(BPF_SUB, BPF_REG_5, BPF_REG_2), BPF_ALU64_IMM(BPF_LSH, BPF_REG_5, 2), /* We have a (4n) value. Let's make a packet offset * out of it. First add 14, to make it a (4n+2) @@ -473,8 +470,26 @@ static struct bpf_align_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .matches = { - {4, "R5=pkt(id=0,off=0,r=0,imm=0)"}, - /* R5 bitwise operator &= on pointer prohibited */ + {4, "R5_w=pkt_end(id=0,off=0,imm=0)"}, + /* (ptr - ptr) << 2 == unknown, (4n) */ + {6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"}, + /* (4n) + 14 == (4n+2). We blow our bounds, because + * the add could overflow. + */ + {7, "R5=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"}, + /* Checked s>=0 */ + {9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, + /* packet pointer + nonnegative (4n+2) */ + {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, + {13, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, + /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine. + * We checked the bounds, but it might have been able + * to overflow if the packet pointer started in the + * upper half of the address space. + * So we did not get a 'range' on R6, and the access + * attempt will fail. + */ + {15, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, } }, { @@ -510,11 +525,11 @@ static struct bpf_align_test tests[] = { * alignment of 4. */ {7, "R2=pkt(id=0,off=0,r=8,imm=0)"}, - {9, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {9, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Adding 14 makes R6 be (4n+2) */ - {10, "R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {10, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* New unknown value in R7 is (4n) */ - {11, "R7=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {11, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Subtracting it from R6 blows our unsigned bounds */ {12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,var_off=(0x2; 0xfffffffffffffffc))"}, /* Checked s>= 0 */ @@ -563,15 +578,15 @@ static struct bpf_align_test tests[] = { * alignment of 4. */ {7, "R2=pkt(id=0,off=0,r=8,imm=0)"}, - {10, "R6=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"}, + {10, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"}, /* Adding 14 makes R6 be (4n+2) */ - {11, "R6=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"}, + {11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"}, /* Subtracting from packet pointer overflows ubounds */ - {13, "R5=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"}, + {13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"}, /* New unknown value in R7 is (4n), >= 76 */ - {15, "R7=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"}, + {15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"}, /* Adding it to packet pointer gives nice bounds again */ - {16, "R5=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"}, + {16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so @@ -686,9 +701,6 @@ static int do_test(unsigned int from, unsigned int to) int main(int argc, char **argv) { unsigned int from = 0, to = ARRAY_SIZE(tests); - struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; - - setrlimit(RLIMIT_MEMLOCK, &rinf); if (argc == 3) { unsigned int l = atoi(argv[argc - 2]); diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c index 02c85d6c89b0..9c8b50bac7e0 100644 --- a/tools/testing/selftests/bpf/test_dev_cgroup.c +++ b/tools/testing/selftests/bpf/test_dev_cgroup.c @@ -10,16 +10,18 @@ #include <string.h> #include <errno.h> #include <assert.h> +#include <sys/time.h> #include <linux/bpf.h> #include <bpf/bpf.h> #include <bpf/libbpf.h> #include "cgroup_helpers.h" +#include "bpf_rlimit.h" #define DEV_CGROUP_PROG "./dev_cgroup.o" -#define TEST_CGROUP "test-bpf-based-device-cgroup/" +#define TEST_CGROUP "/test-bpf-based-device-cgroup/" int main(int argc, char **argv) { @@ -31,7 +33,7 @@ int main(int argc, char **argv) if (bpf_prog_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE, &obj, &prog_fd)) { printf("Failed to load DEV_CGROUP program\n"); - goto err; + goto out; } if (setup_cgroup_environment()) { @@ -89,5 +91,6 @@ int main(int argc, char **argv) err: cleanup_cgroup_environment(); +out: return error; } diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh index ed4774d8d6ed..35669ccd4d23 100755 --- a/tools/testing/selftests/bpf/test_kmod.sh +++ b/tools/testing/selftests/bpf/test_kmod.sh @@ -10,9 +10,21 @@ test_run() echo "[ JIT enabled:$1 hardened:$2 ]" dmesg -C - insmod $SRC_TREE/lib/test_bpf.ko 2> /dev/null - if [ $? -ne 0 ]; then - rc=1 + if [ -f ${SRC_TREE}/lib/test_bpf.ko ]; then + insmod ${SRC_TREE}/lib/test_bpf.ko 2> /dev/null + if [ $? -ne 0 ]; then + rc=1 + fi + else + # Use modprobe dry run to check for missing test_bpf module + if ! /sbin/modprobe -q -n test_bpf; then + echo "test_bpf: [SKIP]" + elif /sbin/modprobe -q test_bpf; then + echo "test_bpf: ok" + else + echo "test_bpf: [FAIL]" + rc=1 + fi fi rmmod test_bpf 2> /dev/null dmesg | grep FAIL diff --git a/tools/testing/selftests/bpf/test_l4lb_noinline.c b/tools/testing/selftests/bpf/test_l4lb_noinline.c new file mode 100644 index 000000000000..ba44a14e6dc4 --- /dev/null +++ b/tools/testing/selftests/bpf/test_l4lb_noinline.c @@ -0,0 +1,473 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/pkt_cls.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include "bpf_helpers.h" +#include "test_iptunnel_common.h" +#include "bpf_endian.h" + +int _version SEC("version") = 1; + +static __u32 rol32(__u32 word, unsigned int shift) +{ + return (word << shift) | (word >> ((-shift) & 31)); +} + +/* copy paste of jhash from kernel sources to make sure llvm + * can compile it into valid sequence of bpf instructions + */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= c; a ^= rol32(c, 4); c += b; \ + b -= a; b ^= rol32(a, 6); a += c; \ + c -= b; c ^= rol32(b, 8); b += a; \ + a -= c; a ^= rol32(c, 16); c += b; \ + b -= a; b ^= rol32(a, 19); a += c; \ + c -= b; c ^= rol32(b, 4); b += a; \ +} + +#define __jhash_final(a, b, c) \ +{ \ + c ^= b; c -= rol32(b, 14); \ + a ^= c; a -= rol32(c, 11); \ + b ^= a; b -= rol32(a, 25); \ + c ^= b; c -= rol32(b, 16); \ + a ^= c; a -= rol32(c, 4); \ + b ^= a; b -= rol32(a, 14); \ + c ^= b; c -= rol32(b, 24); \ +} + +#define JHASH_INITVAL 0xdeadbeef + +typedef unsigned int u32; + +static u32 jhash(const void *key, u32 length, u32 initval) +{ + u32 a, b, c; + const unsigned char *k = key; + + a = b = c = JHASH_INITVAL + length + initval; + + while (length > 12) { + a += *(u32 *)(k); + b += *(u32 *)(k + 4); + c += *(u32 *)(k + 8); + __jhash_mix(a, b, c); + length -= 12; + k += 12; + } + switch (length) { + case 12: c += (u32)k[11]<<24; + case 11: c += (u32)k[10]<<16; + case 10: c += (u32)k[9]<<8; + case 9: c += k[8]; + case 8: b += (u32)k[7]<<24; + case 7: b += (u32)k[6]<<16; + case 6: b += (u32)k[5]<<8; + case 5: b += k[4]; + case 4: a += (u32)k[3]<<24; + case 3: a += (u32)k[2]<<16; + case 2: a += (u32)k[1]<<8; + case 1: a += k[0]; + __jhash_final(a, b, c); + case 0: /* Nothing left to add */ + break; + } + + return c; +} + +static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) +{ + a += initval; + b += initval; + c += initval; + __jhash_final(a, b, c); + return c; +} + +static u32 jhash_2words(u32 a, u32 b, u32 initval) +{ + return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); +} + +#define PCKT_FRAGMENTED 65343 +#define IPV4_HDR_LEN_NO_OPT 20 +#define IPV4_PLUS_ICMP_HDR 28 +#define IPV6_PLUS_ICMP_HDR 48 +#define RING_SIZE 2 +#define MAX_VIPS 12 +#define MAX_REALS 5 +#define CTL_MAP_SIZE 16 +#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE) +#define F_IPV6 (1 << 0) +#define F_HASH_NO_SRC_PORT (1 << 0) +#define F_ICMP (1 << 0) +#define F_SYN_SET (1 << 1) + +struct packet_description { + union { + __be32 src; + __be32 srcv6[4]; + }; + union { + __be32 dst; + __be32 dstv6[4]; + }; + union { + __u32 ports; + __u16 port16[2]; + }; + __u8 proto; + __u8 flags; +}; + +struct ctl_value { + union { + __u64 value; + __u32 ifindex; + __u8 mac[6]; + }; +}; + +struct vip_meta { + __u32 flags; + __u32 vip_num; +}; + +struct real_definition { + union { + __be32 dst; + __be32 dstv6[4]; + }; + __u8 flags; +}; + +struct vip_stats { + __u64 bytes; + __u64 pkts; +}; + +struct eth_hdr { + unsigned char eth_dest[ETH_ALEN]; + unsigned char eth_source[ETH_ALEN]; + unsigned short eth_proto; +}; + +struct bpf_map_def SEC("maps") vip_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct vip), + .value_size = sizeof(struct vip_meta), + .max_entries = MAX_VIPS, +}; + +struct bpf_map_def SEC("maps") ch_rings = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = CH_RINGS_SIZE, +}; + +struct bpf_map_def SEC("maps") reals = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct real_definition), + .max_entries = MAX_REALS, +}; + +struct bpf_map_def SEC("maps") stats = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct vip_stats), + .max_entries = MAX_VIPS, +}; + +struct bpf_map_def SEC("maps") ctl_array = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct ctl_value), + .max_entries = CTL_MAP_SIZE, +}; + +static __u32 get_packet_hash(struct packet_description *pckt, + bool ipv6) +{ + if (ipv6) + return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS), + pckt->ports, CH_RINGS_SIZE); + else + return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE); +} + +static bool get_packet_dst(struct real_definition **real, + struct packet_description *pckt, + struct vip_meta *vip_info, + bool is_ipv6) +{ + __u32 hash = get_packet_hash(pckt, is_ipv6); + __u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE; + __u32 *real_pos; + + if (hash != 0x358459b7 /* jhash of ipv4 packet */ && + hash != 0x2f4bc6bb /* jhash of ipv6 packet */) + return 0; + + real_pos = bpf_map_lookup_elem(&ch_rings, &key); + if (!real_pos) + return false; + key = *real_pos; + *real = bpf_map_lookup_elem(&reals, &key); + if (!(*real)) + return false; + return true; +} + +static int parse_icmpv6(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmp6hdr *icmp_hdr; + struct ipv6hdr *ip6h; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return TC_ACT_SHOT; + if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG) + return TC_ACT_OK; + off += sizeof(struct icmp6hdr); + ip6h = data + off; + if (ip6h + 1 > data_end) + return TC_ACT_SHOT; + pckt->proto = ip6h->nexthdr; + pckt->flags |= F_ICMP; + memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16); + memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16); + return TC_ACT_UNSPEC; +} + +static int parse_icmp(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmphdr *icmp_hdr; + struct iphdr *iph; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return TC_ACT_SHOT; + if (icmp_hdr->type != ICMP_DEST_UNREACH || + icmp_hdr->code != ICMP_FRAG_NEEDED) + return TC_ACT_OK; + off += sizeof(struct icmphdr); + iph = data + off; + if (iph + 1 > data_end) + return TC_ACT_SHOT; + if (iph->ihl != 5) + return TC_ACT_SHOT; + pckt->proto = iph->protocol; + pckt->flags |= F_ICMP; + pckt->src = iph->daddr; + pckt->dst = iph->saddr; + return TC_ACT_UNSPEC; +} + +static bool parse_udp(void *data, __u64 off, void *data_end, + struct packet_description *pckt) +{ + struct udphdr *udp; + udp = data + off; + + if (udp + 1 > data_end) + return false; + + if (!(pckt->flags & F_ICMP)) { + pckt->port16[0] = udp->source; + pckt->port16[1] = udp->dest; + } else { + pckt->port16[0] = udp->dest; + pckt->port16[1] = udp->source; + } + return true; +} + +static bool parse_tcp(void *data, __u64 off, void *data_end, + struct packet_description *pckt) +{ + struct tcphdr *tcp; + + tcp = data + off; + if (tcp + 1 > data_end) + return false; + + if (tcp->syn) + pckt->flags |= F_SYN_SET; + + if (!(pckt->flags & F_ICMP)) { + pckt->port16[0] = tcp->source; + pckt->port16[1] = tcp->dest; + } else { + pckt->port16[0] = tcp->dest; + pckt->port16[1] = tcp->source; + } + return true; +} + +static int process_packet(void *data, __u64 off, void *data_end, + bool is_ipv6, struct __sk_buff *skb) +{ + void *pkt_start = (void *)(long)skb->data; + struct packet_description pckt = {}; + struct eth_hdr *eth = pkt_start; + struct bpf_tunnel_key tkey = {}; + struct vip_stats *data_stats; + struct real_definition *dst; + struct vip_meta *vip_info; + struct ctl_value *cval; + __u32 v4_intf_pos = 1; + __u32 v6_intf_pos = 2; + struct ipv6hdr *ip6h; + struct vip vip = {}; + struct iphdr *iph; + int tun_flag = 0; + __u16 pkt_bytes; + __u64 iph_len; + __u32 ifindex; + __u8 protocol; + __u32 vip_num; + int action; + + tkey.tunnel_ttl = 64; + if (is_ipv6) { + ip6h = data + off; + if (ip6h + 1 > data_end) + return TC_ACT_SHOT; + + iph_len = sizeof(struct ipv6hdr); + protocol = ip6h->nexthdr; + pckt.proto = protocol; + pkt_bytes = bpf_ntohs(ip6h->payload_len); + off += iph_len; + if (protocol == IPPROTO_FRAGMENT) { + return TC_ACT_SHOT; + } else if (protocol == IPPROTO_ICMPV6) { + action = parse_icmpv6(data, data_end, off, &pckt); + if (action >= 0) + return action; + off += IPV6_PLUS_ICMP_HDR; + } else { + memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16); + memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16); + } + } else { + iph = data + off; + if (iph + 1 > data_end) + return TC_ACT_SHOT; + if (iph->ihl != 5) + return TC_ACT_SHOT; + + protocol = iph->protocol; + pckt.proto = protocol; + pkt_bytes = bpf_ntohs(iph->tot_len); + off += IPV4_HDR_LEN_NO_OPT; + + if (iph->frag_off & PCKT_FRAGMENTED) + return TC_ACT_SHOT; + if (protocol == IPPROTO_ICMP) { + action = parse_icmp(data, data_end, off, &pckt); + if (action >= 0) + return action; + off += IPV4_PLUS_ICMP_HDR; + } else { + pckt.src = iph->saddr; + pckt.dst = iph->daddr; + } + } + protocol = pckt.proto; + + if (protocol == IPPROTO_TCP) { + if (!parse_tcp(data, off, data_end, &pckt)) + return TC_ACT_SHOT; + } else if (protocol == IPPROTO_UDP) { + if (!parse_udp(data, off, data_end, &pckt)) + return TC_ACT_SHOT; + } else { + return TC_ACT_SHOT; + } + + if (is_ipv6) + memcpy(vip.daddr.v6, pckt.dstv6, 16); + else + vip.daddr.v4 = pckt.dst; + + vip.dport = pckt.port16[1]; + vip.protocol = pckt.proto; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) { + vip.dport = 0; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) + return TC_ACT_SHOT; + pckt.port16[1] = 0; + } + + if (vip_info->flags & F_HASH_NO_SRC_PORT) + pckt.port16[0] = 0; + + if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6)) + return TC_ACT_SHOT; + + if (dst->flags & F_IPV6) { + cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos); + if (!cval) + return TC_ACT_SHOT; + ifindex = cval->ifindex; + memcpy(tkey.remote_ipv6, dst->dstv6, 16); + tun_flag = BPF_F_TUNINFO_IPV6; + } else { + cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos); + if (!cval) + return TC_ACT_SHOT; + ifindex = cval->ifindex; + tkey.remote_ipv4 = dst->dst; + } + vip_num = vip_info->vip_num; + data_stats = bpf_map_lookup_elem(&stats, &vip_num); + if (!data_stats) + return TC_ACT_SHOT; + data_stats->pkts++; + data_stats->bytes += pkt_bytes; + bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag); + *(u32 *)eth->eth_dest = tkey.remote_ipv4; + return bpf_redirect(ifindex, 0); +} + +SEC("l4lb-demo") +int balancer_ingress(struct __sk_buff *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct eth_hdr *eth = data; + __u32 eth_proto; + __u32 nh_off; + + nh_off = sizeof(struct eth_hdr); + if (data + nh_off > data_end) + return TC_ACT_SHOT; + eth_proto = eth->eth_proto; + if (eth_proto == bpf_htons(ETH_P_IP)) + return process_packet(data, nh_off, data_end, false, ctx); + else if (eth_proto == bpf_htons(ETH_P_IPV6)) + return process_packet(data, nh_off, data_end, true, ctx); + else + return TC_ACT_SHOT; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_libbpf.sh b/tools/testing/selftests/bpf/test_libbpf.sh new file mode 100755 index 000000000000..d97dc914cd49 --- /dev/null +++ b/tools/testing/selftests/bpf/test_libbpf.sh @@ -0,0 +1,49 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +export TESTNAME=test_libbpf + +# Determine selftest success via shell exit code +exit_handler() +{ + if (( $? == 0 )); then + echo "selftests: $TESTNAME [PASS]"; + else + echo "$TESTNAME: failed at file $LAST_LOADED" 1>&2 + echo "selftests: $TESTNAME [FAILED]"; + fi +} + +libbpf_open_file() +{ + LAST_LOADED=$1 + if [ -n "$VERBOSE" ]; then + ./test_libbpf_open $1 + else + ./test_libbpf_open --quiet $1 + fi +} + +# Exit script immediately (well catched by trap handler) if any +# program/thing exits with a non-zero status. +set -e + +# (Use 'trap -l' to list meaning of numbers) +trap exit_handler 0 2 3 6 9 + +libbpf_open_file test_l4lb.o + +# TODO: fix libbpf to load noinline functions +# [warning] libbpf: incorrect bpf_call opcode +#libbpf_open_file test_l4lb_noinline.o + +# TODO: fix test_xdp_meta.c to load with libbpf +# [warning] libbpf: test_xdp_meta.o doesn't provide kernel version +#libbpf_open_file test_xdp_meta.o + +# TODO: fix libbpf to handle .eh_frame +# [warning] libbpf: relocation failed: no section(10) +#libbpf_open_file ../../../../samples/bpf/tracex3_kern.o + +# Success +exit 0 diff --git a/tools/testing/selftests/bpf/test_libbpf_open.c b/tools/testing/selftests/bpf/test_libbpf_open.c new file mode 100644 index 000000000000..8fcd1c076add --- /dev/null +++ b/tools/testing/selftests/bpf/test_libbpf_open.c @@ -0,0 +1,150 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc. + */ +static const char *__doc__ = + "Libbpf test program for loading BPF ELF object files"; + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <stdarg.h> +#include <bpf/libbpf.h> +#include <getopt.h> + +static const struct option long_options[] = { + {"help", no_argument, NULL, 'h' }, + {"debug", no_argument, NULL, 'D' }, + {"quiet", no_argument, NULL, 'q' }, + {0, 0, NULL, 0 } +}; + +static void usage(char *argv[]) +{ + int i; + + printf("\nDOCUMENTATION:\n%s\n\n", __doc__); + printf(" Usage: %s (options-see-below) BPF_FILE\n", argv[0]); + printf(" Listing options:\n"); + for (i = 0; long_options[i].name != 0; i++) { + printf(" --%-12s", long_options[i].name); + printf(" short-option: -%c", + long_options[i].val); + printf("\n"); + } + printf("\n"); +} + +#define DEFINE_PRINT_FN(name, enabled) \ +static int libbpf_##name(const char *fmt, ...) \ +{ \ + va_list args; \ + int ret; \ + \ + va_start(args, fmt); \ + if (enabled) { \ + fprintf(stderr, "[" #name "] "); \ + ret = vfprintf(stderr, fmt, args); \ + } \ + va_end(args); \ + return ret; \ +} +DEFINE_PRINT_FN(warning, 1) +DEFINE_PRINT_FN(info, 1) +DEFINE_PRINT_FN(debug, 1) + +#define EXIT_FAIL_LIBBPF EXIT_FAILURE +#define EXIT_FAIL_OPTION 2 + +int test_walk_progs(struct bpf_object *obj, bool verbose) +{ + struct bpf_program *prog; + int cnt = 0; + + bpf_object__for_each_program(prog, obj) { + cnt++; + if (verbose) + printf("Prog (count:%d) section_name: %s\n", cnt, + bpf_program__title(prog, false)); + } + return 0; +} + +int test_walk_maps(struct bpf_object *obj, bool verbose) +{ + struct bpf_map *map; + int cnt = 0; + + bpf_map__for_each(map, obj) { + cnt++; + if (verbose) + printf("Map (count:%d) name: %s\n", cnt, + bpf_map__name(map)); + } + return 0; +} + +int test_open_file(char *filename, bool verbose) +{ + struct bpf_object *bpfobj = NULL; + long err; + + if (verbose) + printf("Open BPF ELF-file with libbpf: %s\n", filename); + + /* Load BPF ELF object file and check for errors */ + bpfobj = bpf_object__open(filename); + err = libbpf_get_error(bpfobj); + if (err) { + char err_buf[128]; + libbpf_strerror(err, err_buf, sizeof(err_buf)); + if (verbose) + printf("Unable to load eBPF objects in file '%s': %s\n", + filename, err_buf); + return EXIT_FAIL_LIBBPF; + } + test_walk_progs(bpfobj, verbose); + test_walk_maps(bpfobj, verbose); + + if (verbose) + printf("Close BPF ELF-file with libbpf: %s\n", + bpf_object__name(bpfobj)); + bpf_object__close(bpfobj); + + return 0; +} + +int main(int argc, char **argv) +{ + char filename[1024] = { 0 }; + bool verbose = 1; + int longindex = 0; + int opt; + + libbpf_set_print(libbpf_warning, libbpf_info, NULL); + + /* Parse commands line args */ + while ((opt = getopt_long(argc, argv, "hDq", + long_options, &longindex)) != -1) { + switch (opt) { + case 'D': + libbpf_set_print(libbpf_warning, libbpf_info, + libbpf_debug); + break; + case 'q': /* Use in scripting mode */ + verbose = 0; + break; + case 'h': + default: + usage(argv); + return EXIT_FAIL_OPTION; + } + } + if (optind >= argc) { + usage(argv); + printf("ERROR: Expected BPF_FILE argument after options\n"); + return EXIT_FAIL_OPTION; + } + snprintf(filename, sizeof(filename), "%s", argv[optind]); + + return test_open_file(filename, verbose); +} diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c index f61480641b6e..147e34cfceb7 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/test_lpm_map.c @@ -14,6 +14,7 @@ #include <errno.h> #include <inttypes.h> #include <linux/bpf.h> +#include <pthread.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -21,10 +22,11 @@ #include <unistd.h> #include <arpa/inet.h> #include <sys/time.h> -#include <sys/resource.h> #include <bpf/bpf.h> + #include "bpf_util.h" +#include "bpf_rlimit.h" struct tlpm_node { struct tlpm_node *next; @@ -521,19 +523,225 @@ static void test_lpm_delete(void) close(map_fd); } +static void test_lpm_get_next_key(void) +{ + struct bpf_lpm_trie_key *key_p, *next_key_p; + size_t key_size; + __u32 value = 0; + int map_fd; + + key_size = sizeof(*key_p) + sizeof(__u32); + key_p = alloca(key_size); + next_key_p = alloca(key_size); + + map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, sizeof(value), + 100, BPF_F_NO_PREALLOC); + assert(map_fd >= 0); + + /* empty tree. get_next_key should return ENOENT */ + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -1 && + errno == ENOENT); + + /* get and verify the first key, get the second one should fail. */ + key_p->prefixlen = 16; + inet_pton(AF_INET, "192.168.0.0", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 16 && key_p->data[0] == 192 && + key_p->data[1] == 168); + + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && + errno == ENOENT); + + /* no exact matching key should get the first one in post order. */ + key_p->prefixlen = 8; + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 16 && key_p->data[0] == 192 && + key_p->data[1] == 168); + + /* add one more element (total two) */ + key_p->prefixlen = 24; + inet_pton(AF_INET, "192.168.0.0", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && + key_p->data[1] == 168 && key_p->data[2] == 0); + + memset(next_key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && + errno == ENOENT); + + /* Add one more element (total three) */ + key_p->prefixlen = 24; + inet_pton(AF_INET, "192.168.128.0", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && + key_p->data[1] == 168 && key_p->data[2] == 0); + + memset(next_key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 128); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && + errno == ENOENT); + + /* Add one more element (total four) */ + key_p->prefixlen = 24; + inet_pton(AF_INET, "192.168.1.0", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && + key_p->data[1] == 168 && key_p->data[2] == 0); + + memset(next_key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 1); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 128); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && + errno == ENOENT); + + /* no exact matching key should return the first one in post order */ + key_p->prefixlen = 22; + inet_pton(AF_INET, "192.168.1.0", key_p->data); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 0); + + close(map_fd); +} + +#define MAX_TEST_KEYS 4 +struct lpm_mt_test_info { + int cmd; /* 0: update, 1: delete, 2: lookup, 3: get_next_key */ + int iter; + int map_fd; + struct { + __u32 prefixlen; + __u32 data; + } key[MAX_TEST_KEYS]; +}; + +static void *lpm_test_command(void *arg) +{ + int i, j, ret, iter, key_size; + struct lpm_mt_test_info *info = arg; + struct bpf_lpm_trie_key *key_p; + + key_size = sizeof(struct bpf_lpm_trie_key) + sizeof(__u32); + key_p = alloca(key_size); + for (iter = 0; iter < info->iter; iter++) + for (i = 0; i < MAX_TEST_KEYS; i++) { + /* first half of iterations in forward order, + * and second half in backward order. + */ + j = (iter < (info->iter / 2)) ? i : MAX_TEST_KEYS - i - 1; + key_p->prefixlen = info->key[j].prefixlen; + memcpy(key_p->data, &info->key[j].data, sizeof(__u32)); + if (info->cmd == 0) { + __u32 value = j; + /* update must succeed */ + assert(bpf_map_update_elem(info->map_fd, key_p, &value, 0) == 0); + } else if (info->cmd == 1) { + ret = bpf_map_delete_elem(info->map_fd, key_p); + assert(ret == 0 || errno == ENOENT); + } else if (info->cmd == 2) { + __u32 value; + ret = bpf_map_lookup_elem(info->map_fd, key_p, &value); + assert(ret == 0 || errno == ENOENT); + } else { + struct bpf_lpm_trie_key *next_key_p = alloca(key_size); + ret = bpf_map_get_next_key(info->map_fd, key_p, next_key_p); + assert(ret == 0 || errno == ENOENT || errno == ENOMEM); + } + } + + // Pass successful exit info back to the main thread + pthread_exit((void *)info); +} + +static void setup_lpm_mt_test_info(struct lpm_mt_test_info *info, int map_fd) +{ + info->iter = 2000; + info->map_fd = map_fd; + info->key[0].prefixlen = 16; + inet_pton(AF_INET, "192.168.0.0", &info->key[0].data); + info->key[1].prefixlen = 24; + inet_pton(AF_INET, "192.168.0.0", &info->key[1].data); + info->key[2].prefixlen = 24; + inet_pton(AF_INET, "192.168.128.0", &info->key[2].data); + info->key[3].prefixlen = 24; + inet_pton(AF_INET, "192.168.1.0", &info->key[3].data); +} + +static void test_lpm_multi_thread(void) +{ + struct lpm_mt_test_info info[4]; + size_t key_size, value_size; + pthread_t thread_id[4]; + int i, map_fd; + void *ret; + + /* create a trie */ + value_size = sizeof(__u32); + key_size = sizeof(struct bpf_lpm_trie_key) + value_size; + map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, value_size, + 100, BPF_F_NO_PREALLOC); + + /* create 4 threads to test update, delete, lookup and get_next_key */ + setup_lpm_mt_test_info(&info[0], map_fd); + for (i = 0; i < 4; i++) { + if (i != 0) + memcpy(&info[i], &info[0], sizeof(info[i])); + info[i].cmd = i; + assert(pthread_create(&thread_id[i], NULL, &lpm_test_command, &info[i]) == 0); + } + + for (i = 0; i < 4; i++) + assert(pthread_join(thread_id[i], &ret) == 0 && ret == (void *)&info[i]); + + close(map_fd); +} + int main(void) { - struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY }; - int i, ret; + int i; /* we want predictable, pseudo random tests */ srand(0xf00ba1); - /* allow unlimited locked memory */ - ret = setrlimit(RLIMIT_MEMLOCK, &limit); - if (ret < 0) - perror("Unable to lift memlock rlimit"); - test_lpm_basic(); test_lpm_order(); @@ -542,8 +750,9 @@ int main(void) test_lpm_map(i); test_lpm_ipaddr(); - test_lpm_delete(); + test_lpm_get_next_key(); + test_lpm_multi_thread(); printf("test_lpm: OK\n"); return 0; diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c index 8c10c9180c1a..781c7de343be 100644 --- a/tools/testing/selftests/bpf/test_lru_map.c +++ b/tools/testing/selftests/bpf/test_lru_map.c @@ -16,10 +16,11 @@ #include <time.h> #include <sys/wait.h> -#include <sys/resource.h> #include <bpf/bpf.h> + #include "bpf_util.h" +#include "bpf_rlimit.h" #define LOCAL_FREE_TARGET (128) #define PERCPU_FREE_TARGET (4) @@ -613,7 +614,6 @@ static void test_lru_sanity6(int map_type, int map_flags, int tgt_free) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; int map_types[] = {BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH}; int map_flags[] = {0, BPF_F_NO_COMMON_LRU}; @@ -621,8 +621,6 @@ int main(int argc, char **argv) setbuf(stdout, NULL); - assert(!setrlimit(RLIMIT_MEMLOCK, &r)); - nr_cpus = bpf_num_possible_cpus(); assert(nr_cpus != -1); printf("nr_cpus:%d\n\n", nr_cpus); diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 040356ecc862..6c253343a6f9 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -17,13 +17,14 @@ #include <stdlib.h> #include <sys/wait.h> -#include <sys/resource.h> #include <linux/bpf.h> #include <bpf/bpf.h> #include <bpf/libbpf.h> + #include "bpf_util.h" +#include "bpf_rlimit.h" static int map_flags; @@ -126,6 +127,8 @@ static void test_hashmap_sizes(int task, void *data) fd = bpf_create_map(BPF_MAP_TYPE_HASH, i, j, 2, map_flags); if (fd < 0) { + if (errno == ENOMEM) + return; printf("Failed to create hashmap key=%d value=%d '%s'\n", i, j, strerror(errno)); exit(1); @@ -242,7 +245,7 @@ static void test_hashmap_percpu(int task, void *data) static void test_hashmap_walk(int task, void *data) { - int fd, i, max_entries = 100000; + int fd, i, max_entries = 1000; long long key, value, next_key; bool next_key_valid = true; @@ -461,15 +464,17 @@ static void test_devmap(int task, void *data) #include <linux/err.h> #define SOCKMAP_PARSE_PROG "./sockmap_parse_prog.o" #define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o" +#define SOCKMAP_TCP_MSG_PROG "./sockmap_tcp_msg_prog.o" static void test_sockmap(int tasks, void *data) { - int one = 1, map_fd_rx, map_fd_tx, map_fd_break, s, sc, rc; - struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break; + struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_msg, *bpf_map_break; + int map_fd_msg = 0, map_fd_rx = 0, map_fd_tx = 0, map_fd_break; int ports[] = {50200, 50201, 50202, 50204}; int err, i, fd, udp, sfd[6] = {0xdeadbeef}; u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0}; - int parse_prog, verdict_prog; + int parse_prog, verdict_prog, msg_prog; struct sockaddr_in addr; + int one = 1, s, sc, rc; struct bpf_object *obj; struct timeval to; __u32 key, value; @@ -581,6 +586,12 @@ static void test_sockmap(int tasks, void *data) goto out_sockmap; } + err = bpf_prog_attach(-1, fd, BPF_SK_MSG_VERDICT, 0); + if (!err) { + printf("Failed invalid msg verdict prog attach\n"); + goto out_sockmap; + } + err = bpf_prog_attach(-1, fd, __MAX_BPF_ATTACH_TYPE, 0); if (!err) { printf("Failed unknown prog attach\n"); @@ -599,6 +610,12 @@ static void test_sockmap(int tasks, void *data) goto out_sockmap; } + err = bpf_prog_detach(fd, BPF_SK_MSG_VERDICT); + if (err) { + printf("Failed empty msg verdict prog detach\n"); + goto out_sockmap; + } + err = bpf_prog_detach(fd, __MAX_BPF_ATTACH_TYPE); if (!err) { printf("Detach invalid prog successful\n"); @@ -613,6 +630,13 @@ static void test_sockmap(int tasks, void *data) goto out_sockmap; } + err = bpf_prog_load(SOCKMAP_TCP_MSG_PROG, + BPF_PROG_TYPE_SK_MSG, &obj, &msg_prog); + if (err) { + printf("Failed to load SK_SKB msg prog\n"); + goto out_sockmap; + } + err = bpf_prog_load(SOCKMAP_VERDICT_PROG, BPF_PROG_TYPE_SK_SKB, &obj, &verdict_prog); if (err) { @@ -628,7 +652,7 @@ static void test_sockmap(int tasks, void *data) map_fd_rx = bpf_map__fd(bpf_map_rx); if (map_fd_rx < 0) { - printf("Failed to get map fd\n"); + printf("Failed to get map rx fd\n"); goto out_sockmap; } @@ -644,6 +668,18 @@ static void test_sockmap(int tasks, void *data) goto out_sockmap; } + bpf_map_msg = bpf_object__find_map_by_name(obj, "sock_map_msg"); + if (IS_ERR(bpf_map_msg)) { + printf("Failed to load map msg from msg_verdict prog\n"); + goto out_sockmap; + } + + map_fd_msg = bpf_map__fd(bpf_map_msg); + if (map_fd_msg < 0) { + printf("Failed to get map msg fd\n"); + goto out_sockmap; + } + bpf_map_break = bpf_object__find_map_by_name(obj, "sock_map_break"); if (IS_ERR(bpf_map_break)) { printf("Failed to load map tx from verdict prog\n"); @@ -677,6 +713,12 @@ static void test_sockmap(int tasks, void *data) goto out_sockmap; } + err = bpf_prog_attach(msg_prog, map_fd_msg, BPF_SK_MSG_VERDICT, 0); + if (err) { + printf("Failed msg verdict bpf prog attach\n"); + goto out_sockmap; + } + err = bpf_prog_attach(verdict_prog, map_fd_rx, __MAX_BPF_ATTACH_TYPE, 0); if (!err) { @@ -716,6 +758,14 @@ static void test_sockmap(int tasks, void *data) } } + /* Put sfd[2] (sending fd below) into msg map to test sendmsg bpf */ + i = 0; + err = bpf_map_update_elem(map_fd_msg, &i, &sfd[2], BPF_ANY); + if (err) { + printf("Failed map_fd_msg update sockmap %i\n", err); + goto out_sockmap; + } + /* Test map send/recv */ for (i = 0; i < 2; i++) { buf[0] = i; @@ -868,9 +918,12 @@ static void test_sockmap(int tasks, void *data) goto out_sockmap; } - /* Test map close sockets */ - for (i = 0; i < 6; i++) + /* Test map close sockets and empty maps */ + for (i = 0; i < 6; i++) { + bpf_map_delete_elem(map_fd_tx, &i); + bpf_map_delete_elem(map_fd_rx, &i); close(sfd[i]); + } close(fd); close(map_fd_rx); bpf_object__close(obj); @@ -881,8 +934,13 @@ out: printf("Failed to create sockmap '%i:%s'!\n", i, strerror(errno)); exit(1); out_sockmap: - for (i = 0; i < 6; i++) + for (i = 0; i < 6; i++) { + if (map_fd_tx) + bpf_map_delete_elem(map_fd_tx, &i); + if (map_fd_rx) + bpf_map_delete_elem(map_fd_rx, &i); close(sfd[i]); + } close(fd); exit(1); } @@ -931,8 +989,12 @@ static void test_map_large(void) close(fd); } -static void run_parallel(int tasks, void (*fn)(int task, void *data), - void *data) +#define run_parallel(N, FN, DATA) \ + printf("Fork %d tasks to '" #FN "'\n", N); \ + __run_parallel(N, FN, DATA) + +static void __run_parallel(int tasks, void (*fn)(int task, void *data), + void *data) { pid_t pid[tasks]; int i; @@ -972,7 +1034,7 @@ static void test_map_stress(void) #define DO_UPDATE 1 #define DO_DELETE 0 -static void do_work(int fn, void *data) +static void test_update_delete(int fn, void *data) { int do_update = ((int *)data)[1]; int fd = ((int *)data)[0]; @@ -1012,7 +1074,7 @@ static void test_map_parallel(void) */ data[0] = fd; data[1] = DO_UPDATE; - run_parallel(TASKS, do_work, data); + run_parallel(TASKS, test_update_delete, data); /* Check that key=0 is already there. */ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 && @@ -1035,7 +1097,7 @@ static void test_map_parallel(void) /* Now let's delete all elemenets in parallel. */ data[1] = DO_DELETE; - run_parallel(TASKS, do_work, data); + run_parallel(TASKS, test_update_delete, data); /* Nothing should be left. */ key = -1; @@ -1112,10 +1174,6 @@ static void run_all_tests(void) int main(void) { - struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; - - setrlimit(RLIMIT_MEMLOCK, &rinf); - map_flags = 0; run_all_tests(); diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py new file mode 100755 index 000000000000..e78aad0a68bb --- /dev/null +++ b/tools/testing/selftests/bpf/test_offload.py @@ -0,0 +1,1085 @@ +#!/usr/bin/python3 + +# Copyright (C) 2017 Netronome Systems, Inc. +# +# This software is licensed under the GNU General License Version 2, +# June 1991 as shown in the file COPYING in the top-level directory of this +# source tree. +# +# THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE +# OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME +# THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +from datetime import datetime +import argparse +import json +import os +import pprint +import random +import string +import struct +import subprocess +import time + +logfile = None +log_level = 1 +skip_extack = False +bpf_test_dir = os.path.dirname(os.path.realpath(__file__)) +pp = pprint.PrettyPrinter() +devs = [] # devices we created for clean up +files = [] # files to be removed +netns = [] # net namespaces to be removed + +def log_get_sec(level=0): + return "*" * (log_level + level) + +def log_level_inc(add=1): + global log_level + log_level += add + +def log_level_dec(sub=1): + global log_level + log_level -= sub + +def log_level_set(level): + global log_level + log_level = level + +def log(header, data, level=None): + """ + Output to an optional log. + """ + if logfile is None: + return + if level is not None: + log_level_set(level) + + if not isinstance(data, str): + data = pp.pformat(data) + + if len(header): + logfile.write("\n" + log_get_sec() + " ") + logfile.write(header) + if len(header) and len(data.strip()): + logfile.write("\n") + logfile.write(data) + +def skip(cond, msg): + if not cond: + return + print("SKIP: " + msg) + log("SKIP: " + msg, "", level=1) + os.sys.exit(0) + +def fail(cond, msg): + if not cond: + return + print("FAIL: " + msg) + log("FAIL: " + msg, "", level=1) + os.sys.exit(1) + +def start_test(msg): + log(msg, "", level=1) + log_level_inc() + print(msg) + +def cmd(cmd, shell=True, include_stderr=False, background=False, fail=True): + """ + Run a command in subprocess and return tuple of (retval, stdout); + optionally return stderr as well as third value. + """ + proc = subprocess.Popen(cmd, shell=shell, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + if background: + msg = "%s START: %s" % (log_get_sec(1), + datetime.now().strftime("%H:%M:%S.%f")) + log("BKG " + proc.args, msg) + return proc + + return cmd_result(proc, include_stderr=include_stderr, fail=fail) + +def cmd_result(proc, include_stderr=False, fail=False): + stdout, stderr = proc.communicate() + stdout = stdout.decode("utf-8") + stderr = stderr.decode("utf-8") + proc.stdout.close() + proc.stderr.close() + + stderr = "\n" + stderr + if stderr[-1] == "\n": + stderr = stderr[:-1] + + sec = log_get_sec(1) + log("CMD " + proc.args, + "RETCODE: %d\n%s STDOUT:\n%s%s STDERR:%s\n%s END: %s" % + (proc.returncode, sec, stdout, sec, stderr, + sec, datetime.now().strftime("%H:%M:%S.%f"))) + + if proc.returncode != 0 and fail: + if len(stderr) > 0 and stderr[-1] == "\n": + stderr = stderr[:-1] + raise Exception("Command failed: %s\n%s" % (proc.args, stderr)) + + if include_stderr: + return proc.returncode, stdout, stderr + else: + return proc.returncode, stdout + +def rm(f): + cmd("rm -f %s" % (f)) + if f in files: + files.remove(f) + +def tool(name, args, flags, JSON=True, ns="", fail=True, include_stderr=False): + params = "" + if JSON: + params += "%s " % (flags["json"]) + + if ns != "": + ns = "ip netns exec %s " % (ns) + + if include_stderr: + ret, stdout, stderr = cmd(ns + name + " " + params + args, + fail=fail, include_stderr=True) + else: + ret, stdout = cmd(ns + name + " " + params + args, + fail=fail, include_stderr=False) + + if JSON and len(stdout.strip()) != 0: + out = json.loads(stdout) + else: + out = stdout + + if include_stderr: + return ret, out, stderr + else: + return ret, out + +def bpftool(args, JSON=True, ns="", fail=True): + return tool("bpftool", args, {"json":"-p"}, JSON=JSON, ns=ns, fail=fail) + +def bpftool_prog_list(expected=None, ns=""): + _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True) + if expected is not None: + if len(progs) != expected: + fail(True, "%d BPF programs loaded, expected %d" % + (len(progs), expected)) + return progs + +def bpftool_map_list(expected=None, ns=""): + _, maps = bpftool("map show", JSON=True, ns=ns, fail=True) + if expected is not None: + if len(maps) != expected: + fail(True, "%d BPF maps loaded, expected %d" % + (len(maps), expected)) + return maps + +def bpftool_prog_list_wait(expected=0, n_retry=20): + for i in range(n_retry): + nprogs = len(bpftool_prog_list()) + if nprogs == expected: + return + time.sleep(0.05) + raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs)) + +def bpftool_map_list_wait(expected=0, n_retry=20): + for i in range(n_retry): + nmaps = len(bpftool_map_list()) + if nmaps == expected: + return + time.sleep(0.05) + raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps)) + +def ip(args, force=False, JSON=True, ns="", fail=True, include_stderr=False): + if force: + args = "-force " + args + return tool("ip", args, {"json":"-j"}, JSON=JSON, ns=ns, + fail=fail, include_stderr=include_stderr) + +def tc(args, JSON=True, ns="", fail=True, include_stderr=False): + return tool("tc", args, {"json":"-p"}, JSON=JSON, ns=ns, + fail=fail, include_stderr=include_stderr) + +def ethtool(dev, opt, args, fail=True): + return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail) + +def bpf_obj(name, sec=".text", path=bpf_test_dir,): + return "obj %s sec %s" % (os.path.join(path, name), sec) + +def bpf_pinned(name): + return "pinned %s" % (name) + +def bpf_bytecode(bytecode): + return "bytecode \"%s\"" % (bytecode) + +def mknetns(n_retry=10): + for i in range(n_retry): + name = ''.join([random.choice(string.ascii_letters) for i in range(8)]) + ret, _ = ip("netns add %s" % (name), fail=False) + if ret == 0: + netns.append(name) + return name + return None + +def int2str(fmt, val): + ret = [] + for b in struct.pack(fmt, val): + ret.append(int(b)) + return " ".join(map(lambda x: str(x), ret)) + +def str2int(strtab): + inttab = [] + for i in strtab: + inttab.append(int(i, 16)) + ba = bytearray(inttab) + if len(strtab) == 4: + fmt = "I" + elif len(strtab) == 8: + fmt = "Q" + else: + raise Exception("String array of len %d can't be unpacked to an int" % + (len(strtab))) + return struct.unpack(fmt, ba)[0] + +class DebugfsDir: + """ + Class for accessing DebugFS directories as a dictionary. + """ + + def __init__(self, path): + self.path = path + self._dict = self._debugfs_dir_read(path) + + def __len__(self): + return len(self._dict.keys()) + + def __getitem__(self, key): + if type(key) is int: + key = list(self._dict.keys())[key] + return self._dict[key] + + def __setitem__(self, key, value): + log("DebugFS set %s = %s" % (key, value), "") + log_level_inc() + + cmd("echo '%s' > %s/%s" % (value, self.path, key)) + log_level_dec() + + _, out = cmd('cat %s/%s' % (self.path, key)) + self._dict[key] = out.strip() + + def _debugfs_dir_read(self, path): + dfs = {} + + log("DebugFS state for %s" % (path), "") + log_level_inc(add=2) + + _, out = cmd('ls ' + path) + for f in out.split(): + p = os.path.join(path, f) + if os.path.isfile(p): + _, out = cmd('cat %s/%s' % (path, f)) + dfs[f] = out.strip() + elif os.path.isdir(p): + dfs[f] = DebugfsDir(p) + else: + raise Exception("%s is neither file nor directory" % (p)) + + log_level_dec() + log("DebugFS state", dfs) + log_level_dec() + + return dfs + +class NetdevSim: + """ + Class for netdevsim netdevice and its attributes. + """ + + def __init__(self): + self.dev = self._netdevsim_create() + devs.append(self) + + self.ns = "" + + self.dfs_dir = '/sys/kernel/debug/netdevsim/%s' % (self.dev['ifname']) + self.dfs_refresh() + + def __getitem__(self, key): + return self.dev[key] + + def _netdevsim_create(self): + _, old = ip("link show") + ip("link add sim%d type netdevsim") + _, new = ip("link show") + + for dev in new: + f = filter(lambda x: x["ifname"] == dev["ifname"], old) + if len(list(f)) == 0: + return dev + + raise Exception("failed to create netdevsim device") + + def remove(self): + devs.remove(self) + ip("link del dev %s" % (self.dev["ifname"]), ns=self.ns) + + def dfs_refresh(self): + self.dfs = DebugfsDir(self.dfs_dir) + return self.dfs + + def dfs_num_bound_progs(self): + path = os.path.join(self.dfs_dir, "bpf_bound_progs") + _, progs = cmd('ls %s' % (path)) + return len(progs.split()) + + def dfs_get_bound_progs(self, expected): + progs = DebugfsDir(os.path.join(self.dfs_dir, "bpf_bound_progs")) + if expected is not None: + if len(progs) != expected: + fail(True, "%d BPF programs bound, expected %d" % + (len(progs), expected)) + return progs + + def wait_for_flush(self, bound=0, total=0, n_retry=20): + for i in range(n_retry): + nbound = self.dfs_num_bound_progs() + nprogs = len(bpftool_prog_list()) + if nbound == bound and nprogs == total: + return + time.sleep(0.05) + raise Exception("Time out waiting for program counts to stabilize want %d/%d, have %d bound, %d loaded" % (bound, total, nbound, nprogs)) + + def set_ns(self, ns): + name = "1" if ns == "" else ns + ip("link set dev %s netns %s" % (self.dev["ifname"], name), ns=self.ns) + self.ns = ns + + def set_mtu(self, mtu, fail=True): + return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu), + fail=fail) + + def set_xdp(self, bpf, mode, force=False, JSON=True, verbose=False, + fail=True, include_stderr=False): + if verbose: + bpf += " verbose" + return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf), + force=force, JSON=JSON, + fail=fail, include_stderr=include_stderr) + + def unset_xdp(self, mode, force=False, JSON=True, + fail=True, include_stderr=False): + return ip("link set dev %s xdp%s off" % (self.dev["ifname"], mode), + force=force, JSON=JSON, + fail=fail, include_stderr=include_stderr) + + def ip_link_show(self, xdp): + _, link = ip("link show dev %s" % (self['ifname'])) + if len(link) > 1: + raise Exception("Multiple objects on ip link show") + if len(link) < 1: + return {} + fail(xdp != "xdp" in link, + "XDP program not reporting in iplink (reported %s, expected %s)" % + ("xdp" in link, xdp)) + return link[0] + + def tc_add_ingress(self): + tc("qdisc add dev %s ingress" % (self['ifname'])) + + def tc_del_ingress(self): + tc("qdisc del dev %s ingress" % (self['ifname'])) + + def tc_flush_filters(self, bound=0, total=0): + self.tc_del_ingress() + self.tc_add_ingress() + self.wait_for_flush(bound=bound, total=total) + + def tc_show_ingress(self, expected=None): + # No JSON support, oh well... + flags = ["skip_sw", "skip_hw", "in_hw"] + named = ["protocol", "pref", "chain", "handle", "id", "tag"] + + args = "-s filter show dev %s ingress" % (self['ifname']) + _, out = tc(args, JSON=False) + + filters = [] + lines = out.split('\n') + for line in lines: + words = line.split() + if "handle" not in words: + continue + fltr = {} + for flag in flags: + fltr[flag] = flag in words + for name in named: + try: + idx = words.index(name) + fltr[name] = words[idx + 1] + except ValueError: + pass + filters.append(fltr) + + if expected is not None: + fail(len(filters) != expected, + "%d ingress filters loaded, expected %d" % + (len(filters), expected)) + return filters + + def cls_filter_op(self, op, qdisc="ingress", prio=None, handle=None, + chain=None, cls="", params="", + fail=True, include_stderr=False): + spec = "" + if prio is not None: + spec += " prio %d" % (prio) + if handle: + spec += " handle %s" % (handle) + if chain is not None: + spec += " chain %d" % (chain) + + return tc("filter {op} dev {dev} {qdisc} {spec} {cls} {params}"\ + .format(op=op, dev=self['ifname'], qdisc=qdisc, spec=spec, + cls=cls, params=params), + fail=fail, include_stderr=include_stderr) + + def cls_bpf_add_filter(self, bpf, op="add", prio=None, handle=None, + chain=None, da=False, verbose=False, + skip_sw=False, skip_hw=False, + fail=True, include_stderr=False): + cls = "bpf " + bpf + + params = "" + if da: + params += " da" + if verbose: + params += " verbose" + if skip_sw: + params += " skip_sw" + if skip_hw: + params += " skip_hw" + + return self.cls_filter_op(op=op, prio=prio, handle=handle, cls=cls, + chain=chain, params=params, + fail=fail, include_stderr=include_stderr) + + def set_ethtool_tc_offloads(self, enable, fail=True): + args = "hw-tc-offload %s" % ("on" if enable else "off") + return ethtool(self, "-K", args, fail=fail) + +################################################################################ +def clean_up(): + global files, netns, devs + + for dev in devs: + dev.remove() + for f in files: + cmd("rm -f %s" % (f)) + for ns in netns: + cmd("ip netns delete %s" % (ns)) + files = [] + netns = [] + +def pin_prog(file_name, idx=0): + progs = bpftool_prog_list(expected=(idx + 1)) + prog = progs[idx] + bpftool("prog pin id %d %s" % (prog["id"], file_name)) + files.append(file_name) + + return file_name, bpf_pinned(file_name) + +def pin_map(file_name, idx=0, expected=1): + maps = bpftool_map_list(expected=expected) + m = maps[idx] + bpftool("map pin id %d %s" % (m["id"], file_name)) + files.append(file_name) + + return file_name, bpf_pinned(file_name) + +def check_dev_info_removed(prog_file=None, map_file=None): + bpftool_prog_list(expected=0) + ret, err = bpftool("prog show pin %s" % (prog_file), fail=False) + fail(ret == 0, "Showing prog with removed device did not fail") + fail(err["error"].find("No such device") == -1, + "Showing prog with removed device expected ENODEV, error is %s" % + (err["error"])) + + bpftool_map_list(expected=0) + ret, err = bpftool("map show pin %s" % (map_file), fail=False) + fail(ret == 0, "Showing map with removed device did not fail") + fail(err["error"].find("No such device") == -1, + "Showing map with removed device expected ENODEV, error is %s" % + (err["error"])) + +def check_dev_info(other_ns, ns, prog_file=None, map_file=None, removed=False): + progs = bpftool_prog_list(expected=1, ns=ns) + prog = progs[0] + + fail("dev" not in prog.keys(), "Device parameters not reported") + dev = prog["dev"] + fail("ifindex" not in dev.keys(), "Device parameters not reported") + fail("ns_dev" not in dev.keys(), "Device parameters not reported") + fail("ns_inode" not in dev.keys(), "Device parameters not reported") + + if not other_ns: + fail("ifname" not in dev.keys(), "Ifname not reported") + fail(dev["ifname"] != sim["ifname"], + "Ifname incorrect %s vs %s" % (dev["ifname"], sim["ifname"])) + else: + fail("ifname" in dev.keys(), "Ifname is reported for other ns") + + maps = bpftool_map_list(expected=2, ns=ns) + for m in maps: + fail("dev" not in m.keys(), "Device parameters not reported") + fail(dev != m["dev"], "Map's device different than program's") + +def check_extack(output, reference, args): + if skip_extack: + return + lines = output.split("\n") + comp = len(lines) >= 2 and lines[1] == reference + fail(not comp, "Missing or incorrect netlink extack message") + +def check_extack_nsim(output, reference, args): + check_extack(output, "Error: netdevsim: " + reference, args) + +def check_no_extack(res, needle): + fail((res[1] + res[2]).count(needle) or (res[1] + res[2]).count("Warning:"), + "Found '%s' in command output, leaky extack?" % (needle)) + +def check_verifier_log(output, reference): + lines = output.split("\n") + for l in reversed(lines): + if l == reference: + return + fail(True, "Missing or incorrect message from netdevsim in verifier log") + +def test_spurios_extack(sim, obj, skip_hw, needle): + res = sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=skip_hw, + include_stderr=True) + check_no_extack(res, needle) + res = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, + skip_hw=skip_hw, include_stderr=True) + check_no_extack(res, needle) + res = sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf", + include_stderr=True) + check_no_extack(res, needle) + + +# Parse command line +parser = argparse.ArgumentParser() +parser.add_argument("--log", help="output verbose log to given file") +args = parser.parse_args() +if args.log: + logfile = open(args.log, 'w+') + logfile.write("# -*-Org-*-") + +log("Prepare...", "", level=1) +log_level_inc() + +# Check permissions +skip(os.getuid() != 0, "test must be run as root") + +# Check tools +ret, progs = bpftool("prog", fail=False) +skip(ret != 0, "bpftool not installed") +# Check no BPF programs are loaded +skip(len(progs) != 0, "BPF programs already loaded on the system") + +# Check netdevsim +ret, out = cmd("modprobe netdevsim", fail=False) +skip(ret != 0, "netdevsim module could not be loaded") + +# Check debugfs +_, out = cmd("mount") +if out.find("/sys/kernel/debug type debugfs") == -1: + cmd("mount -t debugfs none /sys/kernel/debug") + +# Check samples are compiled +samples = ["sample_ret0.o", "sample_map_ret0.o"] +for s in samples: + ret, out = cmd("ls %s/%s" % (bpf_test_dir, s), fail=False) + skip(ret != 0, "sample %s/%s not found, please compile it" % + (bpf_test_dir, s)) + +# Check if iproute2 is built with libmnl (needed by extack support) +_, _, err = cmd("tc qdisc delete dev lo handle 0", + fail=False, include_stderr=True) +if err.find("Error: Failed to find qdisc with specified handle.") == -1: + print("Warning: no extack message in iproute2 output, libmnl missing?") + log("Warning: no extack message in iproute2 output, libmnl missing?", "") + skip_extack = True + +# Check if net namespaces seem to work +ns = mknetns() +skip(ns is None, "Could not create a net namespace") +cmd("ip netns delete %s" % (ns)) +netns = [] + +try: + obj = bpf_obj("sample_ret0.o") + bytecode = bpf_bytecode("1,6 0 0 4294967295,") + + start_test("Test destruction of generic XDP...") + sim = NetdevSim() + sim.set_xdp(obj, "generic") + sim.remove() + bpftool_prog_list_wait(expected=0) + + sim = NetdevSim() + sim.tc_add_ingress() + + start_test("Test TC non-offloaded...") + ret, _ = sim.cls_bpf_add_filter(obj, skip_hw=True, fail=False) + fail(ret != 0, "Software TC filter did not load") + + start_test("Test TC non-offloaded isn't getting bound...") + ret, _ = sim.cls_bpf_add_filter(obj, fail=False) + fail(ret != 0, "Software TC filter did not load") + sim.dfs_get_bound_progs(expected=0) + + sim.tc_flush_filters() + + start_test("Test TC offloads are off by default...") + ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "TC filter loaded without enabling TC offloads") + check_extack(err, "Error: TC offload is disabled on net device.", args) + sim.wait_for_flush() + + sim.set_ethtool_tc_offloads(True) + sim.dfs["bpf_tc_non_bound_accept"] = "Y" + + start_test("Test TC offload by default...") + ret, _ = sim.cls_bpf_add_filter(obj, fail=False) + fail(ret != 0, "Software TC filter did not load") + sim.dfs_get_bound_progs(expected=0) + ingress = sim.tc_show_ingress(expected=1) + fltr = ingress[0] + fail(not fltr["in_hw"], "Filter not offloaded by default") + + sim.tc_flush_filters() + + start_test("Test TC cBPF bytcode tries offload by default...") + ret, _ = sim.cls_bpf_add_filter(bytecode, fail=False) + fail(ret != 0, "Software TC filter did not load") + sim.dfs_get_bound_progs(expected=0) + ingress = sim.tc_show_ingress(expected=1) + fltr = ingress[0] + fail(not fltr["in_hw"], "Bytecode not offloaded by default") + + sim.tc_flush_filters() + sim.dfs["bpf_tc_non_bound_accept"] = "N" + + start_test("Test TC cBPF unbound bytecode doesn't offload...") + ret, _, err = sim.cls_bpf_add_filter(bytecode, skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "TC bytecode loaded for offload") + check_extack_nsim(err, "netdevsim configured to reject unbound programs.", + args) + sim.wait_for_flush() + + start_test("Test non-0 chain offload...") + ret, _, err = sim.cls_bpf_add_filter(obj, chain=1, prio=1, handle=1, + skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "Offloaded a filter to chain other than 0") + check_extack(err, "Error: Driver supports only offload of chain 0.", args) + sim.tc_flush_filters() + + start_test("Test TC replace...") + sim.cls_bpf_add_filter(obj, prio=1, handle=1) + sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_sw=True) + sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_sw=True) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=True) + sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_hw=True) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + start_test("Test TC replace bad flags...") + for i in range(3): + for j in range(3): + ret, _ = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, + skip_sw=(j == 1), skip_hw=(j == 2), + fail=False) + fail(bool(ret) != bool(j), + "Software TC incorrect load in replace test, iteration %d" % + (j)) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + start_test("Test spurious extack from the driver...") + test_spurios_extack(sim, obj, False, "netdevsim") + test_spurios_extack(sim, obj, True, "netdevsim") + + sim.set_ethtool_tc_offloads(False) + + test_spurios_extack(sim, obj, False, "TC offload is disabled") + test_spurios_extack(sim, obj, True, "TC offload is disabled") + + sim.set_ethtool_tc_offloads(True) + + sim.tc_flush_filters() + + start_test("Test TC offloads work...") + ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True, + fail=False, include_stderr=True) + fail(ret != 0, "TC filter did not load with TC offloads enabled") + check_verifier_log(err, "[netdevsim] Hello from netdevsim!") + + start_test("Test TC offload basics...") + dfs = sim.dfs_get_bound_progs(expected=1) + progs = bpftool_prog_list(expected=1) + ingress = sim.tc_show_ingress(expected=1) + + dprog = dfs[0] + prog = progs[0] + fltr = ingress[0] + fail(fltr["skip_hw"], "TC does reports 'skip_hw' on offloaded filter") + fail(not fltr["in_hw"], "TC does not report 'in_hw' for offloaded filter") + fail(not fltr["skip_sw"], "TC does not report 'skip_sw' back") + + start_test("Test TC offload is device-bound...") + fail(str(prog["id"]) != fltr["id"], "Program IDs don't match") + fail(prog["tag"] != fltr["tag"], "Program tags don't match") + fail(fltr["id"] != dprog["id"], "Program IDs don't match") + fail(dprog["state"] != "xlated", "Offloaded program state not translated") + fail(dprog["loaded"] != "Y", "Offloaded program is not loaded") + + start_test("Test disabling TC offloads is rejected while filters installed...") + ret, _ = sim.set_ethtool_tc_offloads(False, fail=False) + fail(ret == 0, "Driver should refuse to disable TC offloads with filters installed...") + + start_test("Test qdisc removal frees things...") + sim.tc_flush_filters() + sim.tc_show_ingress(expected=0) + + start_test("Test disabling TC offloads is OK without filters...") + ret, _ = sim.set_ethtool_tc_offloads(False, fail=False) + fail(ret != 0, + "Driver refused to disable TC offloads without filters installed...") + + sim.set_ethtool_tc_offloads(True) + + start_test("Test destroying device gets rid of TC filters...") + sim.cls_bpf_add_filter(obj, skip_sw=True) + sim.remove() + bpftool_prog_list_wait(expected=0) + + sim = NetdevSim() + sim.set_ethtool_tc_offloads(True) + + start_test("Test destroying device gets rid of XDP...") + sim.set_xdp(obj, "offload") + sim.remove() + bpftool_prog_list_wait(expected=0) + + sim = NetdevSim() + sim.set_ethtool_tc_offloads(True) + + start_test("Test XDP prog reporting...") + sim.set_xdp(obj, "drv") + ipl = sim.ip_link_show(xdp=True) + progs = bpftool_prog_list(expected=1) + fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"], + "Loaded program has wrong ID") + + start_test("Test XDP prog replace without force...") + ret, _ = sim.set_xdp(obj, "drv", fail=False) + fail(ret == 0, "Replaced XDP program without -force") + sim.wait_for_flush(total=1) + + start_test("Test XDP prog replace with force...") + ret, _ = sim.set_xdp(obj, "drv", force=True, fail=False) + fail(ret != 0, "Could not replace XDP program with -force") + bpftool_prog_list_wait(expected=1) + ipl = sim.ip_link_show(xdp=True) + progs = bpftool_prog_list(expected=1) + fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"], + "Loaded program has wrong ID") + fail("dev" in progs[0].keys(), + "Device parameters reported for non-offloaded program") + + start_test("Test XDP prog replace with bad flags...") + ret, _, err = sim.set_xdp(obj, "offload", force=True, + fail=False, include_stderr=True) + fail(ret == 0, "Replaced XDP program with a program in different mode") + check_extack_nsim(err, "program loaded with different flags.", args) + ret, _, err = sim.set_xdp(obj, "", force=True, + fail=False, include_stderr=True) + fail(ret == 0, "Replaced XDP program with a program in different mode") + check_extack_nsim(err, "program loaded with different flags.", args) + + start_test("Test XDP prog remove with bad flags...") + ret, _, err = sim.unset_xdp("offload", force=True, + fail=False, include_stderr=True) + fail(ret == 0, "Removed program with a bad mode mode") + check_extack_nsim(err, "program loaded with different flags.", args) + ret, _, err = sim.unset_xdp("", force=True, + fail=False, include_stderr=True) + fail(ret == 0, "Removed program with a bad mode mode") + check_extack_nsim(err, "program loaded with different flags.", args) + + start_test("Test MTU restrictions...") + ret, _ = sim.set_mtu(9000, fail=False) + fail(ret == 0, + "Driver should refuse to increase MTU to 9000 with XDP loaded...") + sim.unset_xdp("drv") + bpftool_prog_list_wait(expected=0) + sim.set_mtu(9000) + ret, _, err = sim.set_xdp(obj, "drv", fail=False, include_stderr=True) + fail(ret == 0, "Driver should refuse to load program with MTU of 9000...") + check_extack_nsim(err, "MTU too large w/ XDP enabled.", args) + sim.set_mtu(1500) + + sim.wait_for_flush() + start_test("Test XDP offload...") + _, _, err = sim.set_xdp(obj, "offload", verbose=True, include_stderr=True) + ipl = sim.ip_link_show(xdp=True) + link_xdp = ipl["xdp"]["prog"] + progs = bpftool_prog_list(expected=1) + prog = progs[0] + fail(link_xdp["id"] != prog["id"], "Loaded program has wrong ID") + check_verifier_log(err, "[netdevsim] Hello from netdevsim!") + + start_test("Test XDP offload is device bound...") + dfs = sim.dfs_get_bound_progs(expected=1) + dprog = dfs[0] + + fail(prog["id"] != link_xdp["id"], "Program IDs don't match") + fail(prog["tag"] != link_xdp["tag"], "Program tags don't match") + fail(str(link_xdp["id"]) != dprog["id"], "Program IDs don't match") + fail(dprog["state"] != "xlated", "Offloaded program state not translated") + fail(dprog["loaded"] != "Y", "Offloaded program is not loaded") + + start_test("Test removing XDP program many times...") + sim.unset_xdp("offload") + sim.unset_xdp("offload") + sim.unset_xdp("drv") + sim.unset_xdp("drv") + sim.unset_xdp("") + sim.unset_xdp("") + bpftool_prog_list_wait(expected=0) + + start_test("Test attempt to use a program for a wrong device...") + sim2 = NetdevSim() + sim2.set_xdp(obj, "offload") + pin_file, pinned = pin_prog("/sys/fs/bpf/tmp") + + ret, _, err = sim.set_xdp(pinned, "offload", + fail=False, include_stderr=True) + fail(ret == 0, "Pinned program loaded for a different device accepted") + check_extack_nsim(err, "program bound to different dev.", args) + sim2.remove() + ret, _, err = sim.set_xdp(pinned, "offload", + fail=False, include_stderr=True) + fail(ret == 0, "Pinned program loaded for a removed device accepted") + check_extack_nsim(err, "xdpoffload of non-bound program.", args) + rm(pin_file) + bpftool_prog_list_wait(expected=0) + + start_test("Test mixing of TC and XDP...") + sim.tc_add_ingress() + sim.set_xdp(obj, "offload") + ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "Loading TC when XDP active should fail") + check_extack_nsim(err, "driver and netdev offload states mismatch.", args) + sim.unset_xdp("offload") + sim.wait_for_flush() + + sim.cls_bpf_add_filter(obj, skip_sw=True) + ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True) + fail(ret == 0, "Loading XDP when TC active should fail") + check_extack_nsim(err, "TC program is already loaded.", args) + + start_test("Test binding TC from pinned...") + pin_file, pinned = pin_prog("/sys/fs/bpf/tmp") + sim.tc_flush_filters(bound=1, total=1) + sim.cls_bpf_add_filter(pinned, da=True, skip_sw=True) + sim.tc_flush_filters(bound=1, total=1) + + start_test("Test binding XDP from pinned...") + sim.set_xdp(obj, "offload") + pin_file, pinned = pin_prog("/sys/fs/bpf/tmp2", idx=1) + + sim.set_xdp(pinned, "offload", force=True) + sim.unset_xdp("offload") + sim.set_xdp(pinned, "offload", force=True) + sim.unset_xdp("offload") + + start_test("Test offload of wrong type fails...") + ret, _ = sim.cls_bpf_add_filter(pinned, da=True, skip_sw=True, fail=False) + fail(ret == 0, "Managed to attach XDP program to TC") + + start_test("Test asking for TC offload of two filters...") + sim.cls_bpf_add_filter(obj, da=True, skip_sw=True) + ret, _, err = sim.cls_bpf_add_filter(obj, da=True, skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "Managed to offload two TC filters at the same time") + check_extack_nsim(err, "driver and netdev offload states mismatch.", args) + + sim.tc_flush_filters(bound=2, total=2) + + start_test("Test if netdev removal waits for translation...") + delay_msec = 500 + sim.dfs["bpf_bind_verifier_delay"] = delay_msec + start = time.time() + cmd_line = "tc filter add dev %s ingress bpf %s da skip_sw" % \ + (sim['ifname'], obj) + tc_proc = cmd(cmd_line, background=True, fail=False) + # Wait for the verifier to start + while sim.dfs_num_bound_progs() <= 2: + pass + sim.remove() + end = time.time() + ret, _ = cmd_result(tc_proc, fail=False) + time_diff = end - start + log("Time", "start:\t%s\nend:\t%s\ndiff:\t%s" % (start, end, time_diff)) + + fail(ret == 0, "Managed to load TC filter on a unregistering device") + delay_sec = delay_msec * 0.001 + fail(time_diff < delay_sec, "Removal process took %s, expected %s" % + (time_diff, delay_sec)) + + # Remove all pinned files and reinstantiate the netdev + clean_up() + bpftool_prog_list_wait(expected=0) + + sim = NetdevSim() + map_obj = bpf_obj("sample_map_ret0.o") + start_test("Test loading program with maps...") + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON + + start_test("Test bpftool bound info reporting (own ns)...") + check_dev_info(False, "") + + start_test("Test bpftool bound info reporting (other ns)...") + ns = mknetns() + sim.set_ns(ns) + check_dev_info(True, "") + + start_test("Test bpftool bound info reporting (remote ns)...") + check_dev_info(False, ns) + + start_test("Test bpftool bound info reporting (back to own ns)...") + sim.set_ns("") + check_dev_info(False, "") + + prog_file, _ = pin_prog("/sys/fs/bpf/tmp_prog") + map_file, _ = pin_map("/sys/fs/bpf/tmp_map", idx=1, expected=2) + sim.remove() + + start_test("Test bpftool bound info reporting (removed dev)...") + check_dev_info_removed(prog_file=prog_file, map_file=map_file) + + # Remove all pinned files and reinstantiate the netdev + clean_up() + bpftool_prog_list_wait(expected=0) + + sim = NetdevSim() + + start_test("Test map update (no flags)...") + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON + maps = bpftool_map_list(expected=2) + array = maps[0] if maps[0]["type"] == "array" else maps[1] + htab = maps[0] if maps[0]["type"] == "hash" else maps[1] + for m in maps: + for i in range(2): + bpftool("map update id %d key %s value %s" % + (m["id"], int2str("I", i), int2str("Q", i * 3))) + + for m in maps: + ret, _ = bpftool("map update id %d key %s value %s" % + (m["id"], int2str("I", 3), int2str("Q", 3 * 3)), + fail=False) + fail(ret == 0, "added too many entries") + + start_test("Test map update (exists)...") + for m in maps: + for i in range(2): + bpftool("map update id %d key %s value %s exist" % + (m["id"], int2str("I", i), int2str("Q", i * 3))) + + for m in maps: + ret, err = bpftool("map update id %d key %s value %s exist" % + (m["id"], int2str("I", 3), int2str("Q", 3 * 3)), + fail=False) + fail(ret == 0, "updated non-existing key") + fail(err["error"].find("No such file or directory") == -1, + "expected ENOENT, error is '%s'" % (err["error"])) + + start_test("Test map update (noexist)...") + for m in maps: + for i in range(2): + ret, err = bpftool("map update id %d key %s value %s noexist" % + (m["id"], int2str("I", i), int2str("Q", i * 3)), + fail=False) + fail(ret == 0, "updated existing key") + fail(err["error"].find("File exists") == -1, + "expected EEXIST, error is '%s'" % (err["error"])) + + start_test("Test map dump...") + for m in maps: + _, entries = bpftool("map dump id %d" % (m["id"])) + for i in range(2): + key = str2int(entries[i]["key"]) + fail(key != i, "expected key %d, got %d" % (key, i)) + val = str2int(entries[i]["value"]) + fail(val != i * 3, "expected value %d, got %d" % (val, i * 3)) + + start_test("Test map getnext...") + for m in maps: + _, entry = bpftool("map getnext id %d" % (m["id"])) + key = str2int(entry["next_key"]) + fail(key != 0, "next key %d, expected %d" % (key, 0)) + _, entry = bpftool("map getnext id %d key %s" % + (m["id"], int2str("I", 0))) + key = str2int(entry["next_key"]) + fail(key != 1, "next key %d, expected %d" % (key, 1)) + ret, err = bpftool("map getnext id %d key %s" % + (m["id"], int2str("I", 1)), fail=False) + fail(ret == 0, "got next key past the end of map") + fail(err["error"].find("No such file or directory") == -1, + "expected ENOENT, error is '%s'" % (err["error"])) + + start_test("Test map delete (htab)...") + for i in range(2): + bpftool("map delete id %d key %s" % (htab["id"], int2str("I", i))) + + start_test("Test map delete (array)...") + for i in range(2): + ret, err = bpftool("map delete id %d key %s" % + (htab["id"], int2str("I", i)), fail=False) + fail(ret == 0, "removed entry from an array") + fail(err["error"].find("No such file or directory") == -1, + "expected ENOENT, error is '%s'" % (err["error"])) + + start_test("Test map remove...") + sim.unset_xdp("offload") + bpftool_map_list_wait(expected=0) + sim.remove() + + sim = NetdevSim() + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON + sim.remove() + bpftool_map_list_wait(expected=0) + + start_test("Test map creation fail path...") + sim = NetdevSim() + sim.dfs["bpf_map_accept"] = "N" + ret, _ = sim.set_xdp(map_obj, "offload", JSON=False, fail=False) + fail(ret == 0, + "netdevsim didn't refuse to create a map with offload disabled") + + print("%s: OK" % (os.path.basename(__file__))) + +finally: + log("Clean up...", "", level=1) + log_level_inc() + clean_up() diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 6761be18a91f..faadbe233966 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -21,10 +21,11 @@ typedef __u16 __sum16; #include <linux/ipv6.h> #include <linux/tcp.h> #include <linux/filter.h> +#include <linux/perf_event.h> #include <linux/unistd.h> +#include <sys/ioctl.h> #include <sys/wait.h> -#include <sys/resource.h> #include <sys/types.h> #include <fcntl.h> @@ -32,9 +33,11 @@ typedef __u16 __sum16; #include <linux/err.h> #include <bpf/bpf.h> #include <bpf/libbpf.h> + #include "test_iptunnel_common.h" #include "bpf_util.h" #include "bpf_endian.h" +#include "bpf_rlimit.h" static int error_cnt, pass_cnt; @@ -167,10 +170,9 @@ out: #define NUM_ITER 100000 #define VIP_NUM 5 -static void test_l4lb(void) +static void test_l4lb(const char *file) { unsigned int nr_cpus = bpf_num_possible_cpus(); - const char *file = "./test_l4lb.o"; struct vip key = {.protocol = 6}; struct vip_meta { __u32 flags; @@ -247,6 +249,95 @@ out: bpf_object__close(obj); } +static void test_l4lb_all(void) +{ + const char *file1 = "./test_l4lb.o"; + const char *file2 = "./test_l4lb_noinline.o"; + + test_l4lb(file1); + test_l4lb(file2); +} + +static void test_xdp_noinline(void) +{ + const char *file = "./test_xdp_noinline.o"; + unsigned int nr_cpus = bpf_num_possible_cpus(); + struct vip key = {.protocol = 6}; + struct vip_meta { + __u32 flags; + __u32 vip_num; + } value = {.vip_num = VIP_NUM}; + __u32 stats_key = VIP_NUM; + struct vip_stats { + __u64 bytes; + __u64 pkts; + } stats[nr_cpus]; + struct real_definition { + union { + __be32 dst; + __be32 dstv6[4]; + }; + __u8 flags; + } real_def = {.dst = MAGIC_VAL}; + __u32 ch_key = 11, real_num = 3; + __u32 duration, retval, size; + int err, i, prog_fd, map_fd; + __u64 bytes = 0, pkts = 0; + struct bpf_object *obj; + char buf[128]; + u32 *magic = (u32 *)buf; + + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + if (err) { + error_cnt++; + return; + } + + map_fd = bpf_find_map(__func__, obj, "vip_map"); + if (map_fd < 0) + goto out; + bpf_map_update_elem(map_fd, &key, &value, 0); + + map_fd = bpf_find_map(__func__, obj, "ch_rings"); + if (map_fd < 0) + goto out; + bpf_map_update_elem(map_fd, &ch_key, &real_num, 0); + + map_fd = bpf_find_map(__func__, obj, "reals"); + if (map_fd < 0) + goto out; + bpf_map_update_elem(map_fd, &real_num, &real_def, 0); + + err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4), + buf, &size, &retval, &duration); + CHECK(err || errno || retval != 1 || size != 54 || + *magic != MAGIC_VAL, "ipv4", + "err %d errno %d retval %d size %d magic %x\n", + err, errno, retval, size, *magic); + + err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6), + buf, &size, &retval, &duration); + CHECK(err || errno || retval != 1 || size != 74 || + *magic != MAGIC_VAL, "ipv6", + "err %d errno %d retval %d size %d magic %x\n", + err, errno, retval, size, *magic); + + map_fd = bpf_find_map(__func__, obj, "stats"); + if (map_fd < 0) + goto out; + bpf_map_lookup_elem(map_fd, &stats_key, stats); + for (i = 0; i < nr_cpus; i++) { + bytes += stats[i].bytes; + pkts += stats[i].pkts; + } + if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) { + error_cnt++; + printf("test_xdp_noinline:FAIL:stats %lld %lld\n", bytes, pkts); + } +out: + bpf_object__close(obj); +} + static void test_tcp_estats(void) { const char *file = "./test_tcp_estats.o"; @@ -617,19 +708,485 @@ static void test_obj_name(void) } } -int main(void) +static void test_tp_attach_query(void) +{ + const int num_progs = 3; + int i, j, bytes, efd, err, prog_fd[num_progs], pmu_fd[num_progs]; + __u32 duration = 0, info_len, saved_prog_ids[num_progs]; + const char *file = "./test_tracepoint.o"; + struct perf_event_query_bpf *query; + struct perf_event_attr attr = {}; + struct bpf_object *obj[num_progs]; + struct bpf_prog_info prog_info; + char buf[256]; + + snprintf(buf, sizeof(buf), + "/sys/kernel/debug/tracing/events/sched/sched_switch/id"); + efd = open(buf, O_RDONLY, 0); + if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno)) + return; + bytes = read(efd, buf, sizeof(buf)); + close(efd); + if (CHECK(bytes <= 0 || bytes >= sizeof(buf), + "read", "bytes %d errno %d\n", bytes, errno)) + return; + + attr.config = strtol(buf, NULL, 0); + attr.type = PERF_TYPE_TRACEPOINT; + attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN; + attr.sample_period = 1; + attr.wakeup_events = 1; + + query = malloc(sizeof(*query) + sizeof(__u32) * num_progs); + for (i = 0; i < num_progs; i++) { + err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj[i], + &prog_fd[i]); + if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) + goto cleanup1; + + bzero(&prog_info, sizeof(prog_info)); + prog_info.jited_prog_len = 0; + prog_info.xlated_prog_len = 0; + prog_info.nr_map_ids = 0; + info_len = sizeof(prog_info); + err = bpf_obj_get_info_by_fd(prog_fd[i], &prog_info, &info_len); + if (CHECK(err, "bpf_obj_get_info_by_fd", "err %d errno %d\n", + err, errno)) + goto cleanup1; + saved_prog_ids[i] = prog_info.id; + + pmu_fd[i] = syscall(__NR_perf_event_open, &attr, -1 /* pid */, + 0 /* cpu 0 */, -1 /* group id */, + 0 /* flags */); + if (CHECK(pmu_fd[i] < 0, "perf_event_open", "err %d errno %d\n", + pmu_fd[i], errno)) + goto cleanup2; + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0); + if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", + err, errno)) + goto cleanup3; + + if (i == 0) { + /* check NULL prog array query */ + query->ids_len = num_progs; + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query); + if (CHECK(err || query->prog_cnt != 0, + "perf_event_ioc_query_bpf", + "err %d errno %d query->prog_cnt %u\n", + err, errno, query->prog_cnt)) + goto cleanup3; + } + + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[i]); + if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", + err, errno)) + goto cleanup3; + + if (i == 1) { + /* try to get # of programs only */ + query->ids_len = 0; + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query); + if (CHECK(err || query->prog_cnt != 2, + "perf_event_ioc_query_bpf", + "err %d errno %d query->prog_cnt %u\n", + err, errno, query->prog_cnt)) + goto cleanup3; + + /* try a few negative tests */ + /* invalid query pointer */ + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, + (struct perf_event_query_bpf *)0x1); + if (CHECK(!err || errno != EFAULT, + "perf_event_ioc_query_bpf", + "err %d errno %d\n", err, errno)) + goto cleanup3; + + /* no enough space */ + query->ids_len = 1; + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query); + if (CHECK(!err || errno != ENOSPC || query->prog_cnt != 2, + "perf_event_ioc_query_bpf", + "err %d errno %d query->prog_cnt %u\n", + err, errno, query->prog_cnt)) + goto cleanup3; + } + + query->ids_len = num_progs; + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query); + if (CHECK(err || query->prog_cnt != (i + 1), + "perf_event_ioc_query_bpf", + "err %d errno %d query->prog_cnt %u\n", + err, errno, query->prog_cnt)) + goto cleanup3; + for (j = 0; j < i + 1; j++) + if (CHECK(saved_prog_ids[j] != query->ids[j], + "perf_event_ioc_query_bpf", + "#%d saved_prog_id %x query prog_id %x\n", + j, saved_prog_ids[j], query->ids[j])) + goto cleanup3; + } + + i = num_progs - 1; + for (; i >= 0; i--) { + cleanup3: + ioctl(pmu_fd[i], PERF_EVENT_IOC_DISABLE); + cleanup2: + close(pmu_fd[i]); + cleanup1: + bpf_object__close(obj[i]); + } + free(query); +} + +static int compare_map_keys(int map1_fd, int map2_fd) +{ + __u32 key, next_key; + char val_buf[PERF_MAX_STACK_DEPTH * + sizeof(struct bpf_stack_build_id)]; + int err; + + err = bpf_map_get_next_key(map1_fd, NULL, &key); + if (err) + return err; + err = bpf_map_lookup_elem(map2_fd, &key, val_buf); + if (err) + return err; + + while (bpf_map_get_next_key(map1_fd, &key, &next_key) == 0) { + err = bpf_map_lookup_elem(map2_fd, &next_key, val_buf); + if (err) + return err; + + key = next_key; + } + if (errno != ENOENT) + return -1; + + return 0; +} + +static void test_stacktrace_map() +{ + int control_map_fd, stackid_hmap_fd, stackmap_fd; + const char *file = "./test_stacktrace_map.o"; + int bytes, efd, err, pmu_fd, prog_fd; + struct perf_event_attr attr = {}; + __u32 key, val, duration = 0; + struct bpf_object *obj; + char buf[256]; + + err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); + if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) + return; + + /* Get the ID for the sched/sched_switch tracepoint */ + snprintf(buf, sizeof(buf), + "/sys/kernel/debug/tracing/events/sched/sched_switch/id"); + efd = open(buf, O_RDONLY, 0); + if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno)) + goto close_prog; + + bytes = read(efd, buf, sizeof(buf)); + close(efd); + if (bytes <= 0 || bytes >= sizeof(buf)) + goto close_prog; + + /* Open the perf event and attach bpf progrram */ + attr.config = strtol(buf, NULL, 0); + attr.type = PERF_TYPE_TRACEPOINT; + attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN; + attr.sample_period = 1; + attr.wakeup_events = 1; + pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, + 0 /* cpu 0 */, -1 /* group id */, + 0 /* flags */); + if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", + pmu_fd, errno)) + goto close_prog; + + err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); + if (err) + goto disable_pmu; + + err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd); + if (err) + goto disable_pmu; + + /* find map fds */ + control_map_fd = bpf_find_map(__func__, obj, "control_map"); + if (control_map_fd < 0) + goto disable_pmu; + + stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap"); + if (stackid_hmap_fd < 0) + goto disable_pmu; + + stackmap_fd = bpf_find_map(__func__, obj, "stackmap"); + if (stackmap_fd < 0) + goto disable_pmu; + + /* give some time for bpf program run */ + sleep(1); + + /* disable stack trace collection */ + key = 0; + val = 1; + bpf_map_update_elem(control_map_fd, &key, &val, 0); + + /* for every element in stackid_hmap, we can find a corresponding one + * in stackmap, and vise versa. + */ + err = compare_map_keys(stackid_hmap_fd, stackmap_fd); + if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", + "err %d errno %d\n", err, errno)) + goto disable_pmu_noerr; + + err = compare_map_keys(stackmap_fd, stackid_hmap_fd); + if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap", + "err %d errno %d\n", err, errno)) + goto disable_pmu_noerr; + + goto disable_pmu_noerr; +disable_pmu: + error_cnt++; +disable_pmu_noerr: + ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); + close(pmu_fd); +close_prog: + bpf_object__close(obj); +} + +static void test_stacktrace_map_raw_tp() +{ + int control_map_fd, stackid_hmap_fd, stackmap_fd; + const char *file = "./test_stacktrace_map.o"; + int efd, err, prog_fd; + __u32 key, val, duration = 0; + struct bpf_object *obj; + + err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); + if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) + return; + + efd = bpf_raw_tracepoint_open("sched_switch", prog_fd); + if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno)) + goto close_prog; + + /* find map fds */ + control_map_fd = bpf_find_map(__func__, obj, "control_map"); + if (control_map_fd < 0) + goto close_prog; + + stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap"); + if (stackid_hmap_fd < 0) + goto close_prog; + + stackmap_fd = bpf_find_map(__func__, obj, "stackmap"); + if (stackmap_fd < 0) + goto close_prog; + + /* give some time for bpf program run */ + sleep(1); + + /* disable stack trace collection */ + key = 0; + val = 1; + bpf_map_update_elem(control_map_fd, &key, &val, 0); + + /* for every element in stackid_hmap, we can find a corresponding one + * in stackmap, and vise versa. + */ + err = compare_map_keys(stackid_hmap_fd, stackmap_fd); + if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", + "err %d errno %d\n", err, errno)) + goto close_prog; + + err = compare_map_keys(stackmap_fd, stackid_hmap_fd); + if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap", + "err %d errno %d\n", err, errno)) + goto close_prog; + + goto close_prog_noerr; +close_prog: + error_cnt++; +close_prog_noerr: + bpf_object__close(obj); +} + +static int extract_build_id(char *build_id, size_t size) { - struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; + FILE *fp; + char *line = NULL; + size_t len = 0; + + fp = popen("readelf -n ./urandom_read | grep 'Build ID'", "r"); + if (fp == NULL) + return -1; + + if (getline(&line, &len, fp) == -1) + goto err; + fclose(fp); + + if (len > size) + len = size; + memcpy(build_id, line, len); + build_id[len] = '\0'; + return 0; +err: + fclose(fp); + return -1; +} + +static void test_stacktrace_build_id(void) +{ + int control_map_fd, stackid_hmap_fd, stackmap_fd; + const char *file = "./test_stacktrace_build_id.o"; + int bytes, efd, err, pmu_fd, prog_fd; + struct perf_event_attr attr = {}; + __u32 key, previous_key, val, duration = 0; + struct bpf_object *obj; + char buf[256]; + int i, j; + struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH]; + int build_id_matches = 0; + + err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); + if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) + goto out; - setrlimit(RLIMIT_MEMLOCK, &rinf); + /* Get the ID for the sched/sched_switch tracepoint */ + snprintf(buf, sizeof(buf), + "/sys/kernel/debug/tracing/events/random/urandom_read/id"); + efd = open(buf, O_RDONLY, 0); + if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno)) + goto close_prog; + + bytes = read(efd, buf, sizeof(buf)); + close(efd); + if (CHECK(bytes <= 0 || bytes >= sizeof(buf), + "read", "bytes %d errno %d\n", bytes, errno)) + goto close_prog; + + /* Open the perf event and attach bpf progrram */ + attr.config = strtol(buf, NULL, 0); + attr.type = PERF_TYPE_TRACEPOINT; + attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN; + attr.sample_period = 1; + attr.wakeup_events = 1; + pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, + 0 /* cpu 0 */, -1 /* group id */, + 0 /* flags */); + if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", + pmu_fd, errno)) + goto close_prog; + + err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); + if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", + err, errno)) + goto close_pmu; + + err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd); + if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", + err, errno)) + goto disable_pmu; + + /* find map fds */ + control_map_fd = bpf_find_map(__func__, obj, "control_map"); + if (CHECK(control_map_fd < 0, "bpf_find_map control_map", + "err %d errno %d\n", err, errno)) + goto disable_pmu; + + stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap"); + if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap", + "err %d errno %d\n", err, errno)) + goto disable_pmu; + + stackmap_fd = bpf_find_map(__func__, obj, "stackmap"); + if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n", + err, errno)) + goto disable_pmu; + + assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null") + == 0); + assert(system("./urandom_read if=/dev/urandom of=/dev/zero count=4 2> /dev/null") == 0); + /* disable stack trace collection */ + key = 0; + val = 1; + bpf_map_update_elem(control_map_fd, &key, &val, 0); + + /* for every element in stackid_hmap, we can find a corresponding one + * in stackmap, and vise versa. + */ + err = compare_map_keys(stackid_hmap_fd, stackmap_fd); + if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", + "err %d errno %d\n", err, errno)) + goto disable_pmu; + + err = compare_map_keys(stackmap_fd, stackid_hmap_fd); + if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap", + "err %d errno %d\n", err, errno)) + goto disable_pmu; + + err = extract_build_id(buf, 256); + + if (CHECK(err, "get build_id with readelf", + "err %d errno %d\n", err, errno)) + goto disable_pmu; + + err = bpf_map_get_next_key(stackmap_fd, NULL, &key); + if (CHECK(err, "get_next_key from stackmap", + "err %d, errno %d\n", err, errno)) + goto disable_pmu; + + do { + char build_id[64]; + + err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs); + if (CHECK(err, "lookup_elem from stackmap", + "err %d, errno %d\n", err, errno)) + goto disable_pmu; + for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i) + if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID && + id_offs[i].offset != 0) { + for (j = 0; j < 20; ++j) + sprintf(build_id + 2 * j, "%02x", + id_offs[i].build_id[j] & 0xff); + if (strstr(buf, build_id) != NULL) + build_id_matches = 1; + } + previous_key = key; + } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0); + + CHECK(build_id_matches < 1, "build id match", + "Didn't find expected build ID from the map"); + +disable_pmu: + ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); + +close_pmu: + close(pmu_fd); + +close_prog: + bpf_object__close(obj); +out: + return; +} + +int main(void) +{ test_pkt_access(); test_xdp(); - test_l4lb(); + test_l4lb_all(); + test_xdp_noinline(); test_tcp_estats(); test_bpf_obj_id(); test_pkt_md_access(); test_obj_name(); + test_tp_attach_query(); + test_stacktrace_map(); + test_stacktrace_build_id(); + test_stacktrace_map_raw_tp(); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c new file mode 100644 index 000000000000..73bb20cfb9b7 --- /dev/null +++ b/tools/testing/selftests/bpf/test_sock.c @@ -0,0 +1,479 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <stdio.h> +#include <unistd.h> + +#include <arpa/inet.h> +#include <sys/types.h> +#include <sys/socket.h> + +#include <linux/filter.h> + +#include <bpf/bpf.h> + +#include "cgroup_helpers.h" + +#ifndef ARRAY_SIZE +# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +#define CG_PATH "/foo" +#define MAX_INSNS 512 + +char bpf_log_buf[BPF_LOG_BUF_SIZE]; + +struct sock_test { + const char *descr; + /* BPF prog properties */ + struct bpf_insn insns[MAX_INSNS]; + enum bpf_attach_type expected_attach_type; + enum bpf_attach_type attach_type; + /* Socket properties */ + int domain; + int type; + /* Endpoint to bind() to */ + const char *ip; + unsigned short port; + /* Expected test result */ + enum { + LOAD_REJECT, + ATTACH_REJECT, + BIND_REJECT, + SUCCESS, + } result; +}; + +static struct sock_test tests[] = { + { + "bind4 load with invalid access: src_ip6", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock, src_ip6[0])), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + BPF_CGROUP_INET4_POST_BIND, + BPF_CGROUP_INET4_POST_BIND, + 0, + 0, + NULL, + 0, + LOAD_REJECT, + }, + { + "bind4 load with invalid access: mark", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock, mark)), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + BPF_CGROUP_INET4_POST_BIND, + BPF_CGROUP_INET4_POST_BIND, + 0, + 0, + NULL, + 0, + LOAD_REJECT, + }, + { + "bind6 load with invalid access: src_ip4", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock, src_ip4)), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + BPF_CGROUP_INET6_POST_BIND, + BPF_CGROUP_INET6_POST_BIND, + 0, + 0, + NULL, + 0, + LOAD_REJECT, + }, + { + "sock_create load with invalid access: src_port", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock, src_port)), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + BPF_CGROUP_INET_SOCK_CREATE, + BPF_CGROUP_INET_SOCK_CREATE, + 0, + 0, + NULL, + 0, + LOAD_REJECT, + }, + { + "sock_create load w/o expected_attach_type (compat mode)", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + 0, + BPF_CGROUP_INET_SOCK_CREATE, + AF_INET, + SOCK_STREAM, + "127.0.0.1", + 8097, + SUCCESS, + }, + { + "sock_create load w/ expected_attach_type", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + BPF_CGROUP_INET_SOCK_CREATE, + BPF_CGROUP_INET_SOCK_CREATE, + AF_INET, + SOCK_STREAM, + "127.0.0.1", + 8097, + SUCCESS, + }, + { + "attach type mismatch bind4 vs bind6", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + BPF_CGROUP_INET4_POST_BIND, + BPF_CGROUP_INET6_POST_BIND, + 0, + 0, + NULL, + 0, + ATTACH_REJECT, + }, + { + "attach type mismatch bind6 vs bind4", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + BPF_CGROUP_INET6_POST_BIND, + BPF_CGROUP_INET4_POST_BIND, + 0, + 0, + NULL, + 0, + ATTACH_REJECT, + }, + { + "attach type mismatch default vs bind4", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + 0, + BPF_CGROUP_INET4_POST_BIND, + 0, + 0, + NULL, + 0, + ATTACH_REJECT, + }, + { + "attach type mismatch bind6 vs sock_create", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + BPF_CGROUP_INET6_POST_BIND, + BPF_CGROUP_INET_SOCK_CREATE, + 0, + 0, + NULL, + 0, + ATTACH_REJECT, + }, + { + "bind4 reject all", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + BPF_CGROUP_INET4_POST_BIND, + BPF_CGROUP_INET4_POST_BIND, + AF_INET, + SOCK_STREAM, + "0.0.0.0", + 0, + BIND_REJECT, + }, + { + "bind6 reject all", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + BPF_CGROUP_INET6_POST_BIND, + BPF_CGROUP_INET6_POST_BIND, + AF_INET6, + SOCK_STREAM, + "::", + 0, + BIND_REJECT, + }, + { + "bind6 deny specific IP & port", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + + /* if (ip == expected && port == expected) */ + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock, src_ip6[3])), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x01000000, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock, src_port)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x2001, 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + BPF_CGROUP_INET6_POST_BIND, + BPF_CGROUP_INET6_POST_BIND, + AF_INET6, + SOCK_STREAM, + "::1", + 8193, + BIND_REJECT, + }, + { + "bind4 allow specific IP & port", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + + /* if (ip == expected && port == expected) */ + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock, src_ip4)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x0100007F, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock, src_port)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2), + + /* return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + + /* else return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + BPF_CGROUP_INET4_POST_BIND, + BPF_CGROUP_INET4_POST_BIND, + AF_INET, + SOCK_STREAM, + "127.0.0.1", + 4098, + SUCCESS, + }, + { + "bind4 allow all", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + BPF_CGROUP_INET4_POST_BIND, + BPF_CGROUP_INET4_POST_BIND, + AF_INET, + SOCK_STREAM, + "0.0.0.0", + 0, + SUCCESS, + }, + { + "bind6 allow all", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + BPF_CGROUP_INET6_POST_BIND, + BPF_CGROUP_INET6_POST_BIND, + AF_INET6, + SOCK_STREAM, + "::", + 0, + SUCCESS, + }, +}; + +static size_t probe_prog_length(const struct bpf_insn *fp) +{ + size_t len; + + for (len = MAX_INSNS - 1; len > 0; --len) + if (fp[len].code != 0 || fp[len].imm != 0) + break; + return len + 1; +} + +static int load_sock_prog(const struct bpf_insn *prog, + enum bpf_attach_type attach_type) +{ + struct bpf_load_program_attr attr; + + memset(&attr, 0, sizeof(struct bpf_load_program_attr)); + attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK; + attr.expected_attach_type = attach_type; + attr.insns = prog; + attr.insns_cnt = probe_prog_length(attr.insns); + attr.license = "GPL"; + + return bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE); +} + +static int attach_sock_prog(int cgfd, int progfd, + enum bpf_attach_type attach_type) +{ + return bpf_prog_attach(progfd, cgfd, attach_type, BPF_F_ALLOW_OVERRIDE); +} + +static int bind_sock(int domain, int type, const char *ip, unsigned short port) +{ + struct sockaddr_storage addr; + struct sockaddr_in6 *addr6; + struct sockaddr_in *addr4; + int sockfd = -1; + socklen_t len; + int err = 0; + + sockfd = socket(domain, type, 0); + if (sockfd < 0) + goto err; + + memset(&addr, 0, sizeof(addr)); + + if (domain == AF_INET) { + len = sizeof(struct sockaddr_in); + addr4 = (struct sockaddr_in *)&addr; + addr4->sin_family = domain; + addr4->sin_port = htons(port); + if (inet_pton(domain, ip, (void *)&addr4->sin_addr) != 1) + goto err; + } else if (domain == AF_INET6) { + len = sizeof(struct sockaddr_in6); + addr6 = (struct sockaddr_in6 *)&addr; + addr6->sin6_family = domain; + addr6->sin6_port = htons(port); + if (inet_pton(domain, ip, (void *)&addr6->sin6_addr) != 1) + goto err; + } else { + goto err; + } + + if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) + goto err; + + goto out; +err: + err = -1; +out: + close(sockfd); + return err; +} + +static int run_test_case(int cgfd, const struct sock_test *test) +{ + int progfd = -1; + int err = 0; + + printf("Test case: %s .. ", test->descr); + progfd = load_sock_prog(test->insns, test->expected_attach_type); + if (progfd < 0) { + if (test->result == LOAD_REJECT) + goto out; + else + goto err; + } + + if (attach_sock_prog(cgfd, progfd, test->attach_type) == -1) { + if (test->result == ATTACH_REJECT) + goto out; + else + goto err; + } + + if (bind_sock(test->domain, test->type, test->ip, test->port) == -1) { + /* sys_bind() may fail for different reasons, errno has to be + * checked to confirm that BPF program rejected it. + */ + if (test->result == BIND_REJECT && errno == EPERM) + goto out; + else + goto err; + } + + + if (test->result != SUCCESS) + goto err; + + goto out; +err: + err = -1; +out: + /* Detaching w/o checking return code: best effort attempt. */ + if (progfd != -1) + bpf_prog_detach(cgfd, test->attach_type); + close(progfd); + printf("[%s]\n", err ? "FAIL" : "PASS"); + return err; +} + +static int run_tests(int cgfd) +{ + int passes = 0; + int fails = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(tests); ++i) { + if (run_test_case(cgfd, &tests[i])) + ++fails; + else + ++passes; + } + printf("Summary: %d PASSED, %d FAILED\n", passes, fails); + return fails ? -1 : 0; +} + +int main(int argc, char **argv) +{ + int cgfd = -1; + int err = 0; + + if (setup_cgroup_environment()) + goto err; + + cgfd = create_and_get_cgroup(CG_PATH); + if (!cgfd) + goto err; + + if (join_cgroup(CG_PATH)) + goto err; + + if (run_tests(cgfd)) + goto err; + + goto out; +err: + err = -1; +out: + close(cgfd); + cleanup_cgroup_environment(); + return err; +} diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c new file mode 100644 index 000000000000..d488f20926e8 --- /dev/null +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -0,0 +1,588 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <arpa/inet.h> +#include <sys/types.h> +#include <sys/socket.h> + +#include <linux/filter.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "cgroup_helpers.h" + +#define CG_PATH "/foo" +#define CONNECT4_PROG_PATH "./connect4_prog.o" +#define CONNECT6_PROG_PATH "./connect6_prog.o" + +#define SERV4_IP "192.168.1.254" +#define SERV4_REWRITE_IP "127.0.0.1" +#define SERV4_PORT 4040 +#define SERV4_REWRITE_PORT 4444 + +#define SERV6_IP "face:b00c:1234:5678::abcd" +#define SERV6_REWRITE_IP "::1" +#define SERV6_PORT 6060 +#define SERV6_REWRITE_PORT 6666 + +#define INET_NTOP_BUF 40 + +typedef int (*load_fn)(enum bpf_attach_type, const char *comment); +typedef int (*info_fn)(int, struct sockaddr *, socklen_t *); + +struct program { + enum bpf_attach_type type; + load_fn loadfn; + int fd; + const char *name; + enum bpf_attach_type invalid_type; +}; + +char bpf_log_buf[BPF_LOG_BUF_SIZE]; + +static int mk_sockaddr(int domain, const char *ip, unsigned short port, + struct sockaddr *addr, socklen_t addr_len) +{ + struct sockaddr_in6 *addr6; + struct sockaddr_in *addr4; + + if (domain != AF_INET && domain != AF_INET6) { + log_err("Unsupported address family"); + return -1; + } + + memset(addr, 0, addr_len); + + if (domain == AF_INET) { + if (addr_len < sizeof(struct sockaddr_in)) + return -1; + addr4 = (struct sockaddr_in *)addr; + addr4->sin_family = domain; + addr4->sin_port = htons(port); + if (inet_pton(domain, ip, (void *)&addr4->sin_addr) != 1) { + log_err("Invalid IPv4: %s", ip); + return -1; + } + } else if (domain == AF_INET6) { + if (addr_len < sizeof(struct sockaddr_in6)) + return -1; + addr6 = (struct sockaddr_in6 *)addr; + addr6->sin6_family = domain; + addr6->sin6_port = htons(port); + if (inet_pton(domain, ip, (void *)&addr6->sin6_addr) != 1) { + log_err("Invalid IPv6: %s", ip); + return -1; + } + } + + return 0; +} + +static int load_insns(enum bpf_attach_type attach_type, + const struct bpf_insn *insns, size_t insns_cnt, + const char *comment) +{ + struct bpf_load_program_attr load_attr; + int ret; + + memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); + load_attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; + load_attr.expected_attach_type = attach_type; + load_attr.insns = insns; + load_attr.insns_cnt = insns_cnt; + load_attr.license = "GPL"; + + ret = bpf_load_program_xattr(&load_attr, bpf_log_buf, BPF_LOG_BUF_SIZE); + if (ret < 0 && comment) { + log_err(">>> Loading %s program error.\n" + ">>> Output from verifier:\n%s\n-------\n", + comment, bpf_log_buf); + } + + return ret; +} + +/* [1] These testing programs try to read different context fields, including + * narrow loads of different sizes from user_ip4 and user_ip6, and write to + * those allowed to be overridden. + * + * [2] BPF_LD_IMM64 & BPF_JMP_REG are used below whenever there is a need to + * compare a register with unsigned 32bit integer. BPF_JMP_IMM can't be used + * in such cases since it accepts only _signed_ 32bit integer as IMM + * argument. Also note that BPF_LD_IMM64 contains 2 instructions what matters + * to count jumps properly. + */ + +static int bind4_prog_load(enum bpf_attach_type attach_type, + const char *comment) +{ + union { + uint8_t u4_addr8[4]; + uint16_t u4_addr16[2]; + uint32_t u4_addr32; + } ip4; + struct sockaddr_in addr4_rw; + + if (inet_pton(AF_INET, SERV4_IP, (void *)&ip4) != 1) { + log_err("Invalid IPv4: %s", SERV4_IP); + return -1; + } + + if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT, + (struct sockaddr *)&addr4_rw, sizeof(addr4_rw)) == -1) + return -1; + + /* See [1]. */ + struct bpf_insn insns[] = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + + /* if (sk.family == AF_INET && */ + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, family)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 16), + + /* (sk.type == SOCK_DGRAM || sk.type == SOCK_STREAM) && */ + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, type)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 1), + BPF_JMP_A(1), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 12), + + /* 1st_byte_of_user_ip4 == expected && */ + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, user_ip4)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 10), + + /* 1st_half_of_user_ip4 == expected && */ + BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, user_ip4)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 8), + + /* whole_user_ip4 == expected) { */ + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, user_ip4)), + BPF_LD_IMM64(BPF_REG_8, ip4.u4_addr32), /* See [2]. */ + BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 4), + + /* user_ip4 = addr4_rw.sin_addr */ + BPF_MOV32_IMM(BPF_REG_7, addr4_rw.sin_addr.s_addr), + BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, + offsetof(struct bpf_sock_addr, user_ip4)), + + /* user_port = addr4_rw.sin_port */ + BPF_MOV32_IMM(BPF_REG_7, addr4_rw.sin_port), + BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, + offsetof(struct bpf_sock_addr, user_port)), + /* } */ + + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }; + + return load_insns(attach_type, insns, + sizeof(insns) / sizeof(struct bpf_insn), comment); +} + +static int bind6_prog_load(enum bpf_attach_type attach_type, + const char *comment) +{ + struct sockaddr_in6 addr6_rw; + struct in6_addr ip6; + + if (inet_pton(AF_INET6, SERV6_IP, (void *)&ip6) != 1) { + log_err("Invalid IPv6: %s", SERV6_IP); + return -1; + } + + if (mk_sockaddr(AF_INET6, SERV6_REWRITE_IP, SERV6_REWRITE_PORT, + (struct sockaddr *)&addr6_rw, sizeof(addr6_rw)) == -1) + return -1; + + /* See [1]. */ + struct bpf_insn insns[] = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + + /* if (sk.family == AF_INET6 && */ + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, family)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 18), + + /* 5th_byte_of_user_ip6 == expected && */ + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, user_ip6[1])), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip6.s6_addr[4], 16), + + /* 3rd_half_of_user_ip6 == expected && */ + BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, user_ip6[1])), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip6.s6_addr16[2], 14), + + /* last_word_of_user_ip6 == expected) { */ + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, user_ip6[3])), + BPF_LD_IMM64(BPF_REG_8, ip6.s6_addr32[3]), /* See [2]. */ + BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 10), + + +#define STORE_IPV6_WORD(N) \ + BPF_MOV32_IMM(BPF_REG_7, addr6_rw.sin6_addr.s6_addr32[N]), \ + BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, \ + offsetof(struct bpf_sock_addr, user_ip6[N])) + + /* user_ip6 = addr6_rw.sin6_addr */ + STORE_IPV6_WORD(0), + STORE_IPV6_WORD(1), + STORE_IPV6_WORD(2), + STORE_IPV6_WORD(3), + + /* user_port = addr6_rw.sin6_port */ + BPF_MOV32_IMM(BPF_REG_7, addr6_rw.sin6_port), + BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, + offsetof(struct bpf_sock_addr, user_port)), + + /* } */ + + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }; + + return load_insns(attach_type, insns, + sizeof(insns) / sizeof(struct bpf_insn), comment); +} + +static int connect_prog_load_path(const char *path, + enum bpf_attach_type attach_type, + const char *comment) +{ + struct bpf_prog_load_attr attr; + struct bpf_object *obj; + int prog_fd; + + memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); + attr.file = path; + attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; + attr.expected_attach_type = attach_type; + + if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) { + if (comment) + log_err(">>> Loading %s program at %s error.\n", + comment, path); + return -1; + } + + return prog_fd; +} + +static int connect4_prog_load(enum bpf_attach_type attach_type, + const char *comment) +{ + return connect_prog_load_path(CONNECT4_PROG_PATH, attach_type, comment); +} + +static int connect6_prog_load(enum bpf_attach_type attach_type, + const char *comment) +{ + return connect_prog_load_path(CONNECT6_PROG_PATH, attach_type, comment); +} + +static void print_ip_port(int sockfd, info_fn fn, const char *fmt) +{ + char addr_buf[INET_NTOP_BUF]; + struct sockaddr_storage addr; + struct sockaddr_in6 *addr6; + struct sockaddr_in *addr4; + socklen_t addr_len; + unsigned short port; + void *nip; + + addr_len = sizeof(struct sockaddr_storage); + memset(&addr, 0, addr_len); + + if (fn(sockfd, (struct sockaddr *)&addr, (socklen_t *)&addr_len) == 0) { + if (addr.ss_family == AF_INET) { + addr4 = (struct sockaddr_in *)&addr; + nip = (void *)&addr4->sin_addr; + port = ntohs(addr4->sin_port); + } else if (addr.ss_family == AF_INET6) { + addr6 = (struct sockaddr_in6 *)&addr; + nip = (void *)&addr6->sin6_addr; + port = ntohs(addr6->sin6_port); + } else { + return; + } + const char *addr_str = + inet_ntop(addr.ss_family, nip, addr_buf, INET_NTOP_BUF); + printf(fmt, addr_str ? addr_str : "??", port); + } +} + +static void print_local_ip_port(int sockfd, const char *fmt) +{ + print_ip_port(sockfd, getsockname, fmt); +} + +static void print_remote_ip_port(int sockfd, const char *fmt) +{ + print_ip_port(sockfd, getpeername, fmt); +} + +static int start_server(int type, const struct sockaddr_storage *addr, + socklen_t addr_len) +{ + + int fd; + + fd = socket(addr->ss_family, type, 0); + if (fd == -1) { + log_err("Failed to create server socket"); + goto out; + } + + if (bind(fd, (const struct sockaddr *)addr, addr_len) == -1) { + log_err("Failed to bind server socket"); + goto close_out; + } + + if (type == SOCK_STREAM) { + if (listen(fd, 128) == -1) { + log_err("Failed to listen on server socket"); + goto close_out; + } + } + + print_local_ip_port(fd, "\t Actual: bind(%s, %d)\n"); + + goto out; +close_out: + close(fd); + fd = -1; +out: + return fd; +} + +static int connect_to_server(int type, const struct sockaddr_storage *addr, + socklen_t addr_len) +{ + int domain; + int fd; + + domain = addr->ss_family; + + if (domain != AF_INET && domain != AF_INET6) { + log_err("Unsupported address family"); + return -1; + } + + fd = socket(domain, type, 0); + if (fd == -1) { + log_err("Failed to creating client socket"); + return -1; + } + + if (connect(fd, (const struct sockaddr *)addr, addr_len) == -1) { + log_err("Fail to connect to server"); + goto err; + } + + print_remote_ip_port(fd, "\t Actual: connect(%s, %d)"); + print_local_ip_port(fd, " from (%s, %d)\n"); + + return 0; +err: + close(fd); + return -1; +} + +static void print_test_case_num(int domain, int type) +{ + static int test_num; + + printf("Test case #%d (%s/%s):\n", ++test_num, + (domain == AF_INET ? "IPv4" : + domain == AF_INET6 ? "IPv6" : + "unknown_domain"), + (type == SOCK_STREAM ? "TCP" : + type == SOCK_DGRAM ? "UDP" : + "unknown_type")); +} + +static int run_test_case(int domain, int type, const char *ip, + unsigned short port) +{ + struct sockaddr_storage addr; + socklen_t addr_len = sizeof(addr); + int servfd = -1; + int err = 0; + + print_test_case_num(domain, type); + + if (mk_sockaddr(domain, ip, port, (struct sockaddr *)&addr, + addr_len) == -1) + return -1; + + printf("\tRequested: bind(%s, %d) ..\n", ip, port); + servfd = start_server(type, &addr, addr_len); + if (servfd == -1) + goto err; + + printf("\tRequested: connect(%s, %d) from (*, *) ..\n", ip, port); + if (connect_to_server(type, &addr, addr_len)) + goto err; + + goto out; +err: + err = -1; +out: + close(servfd); + return err; +} + +static void close_progs_fds(struct program *progs, size_t prog_cnt) +{ + size_t i; + + for (i = 0; i < prog_cnt; ++i) { + close(progs[i].fd); + progs[i].fd = -1; + } +} + +static int load_and_attach_progs(int cgfd, struct program *progs, + size_t prog_cnt) +{ + size_t i; + + for (i = 0; i < prog_cnt; ++i) { + printf("Load %s with invalid type (can pollute stderr) ", + progs[i].name); + fflush(stdout); + progs[i].fd = progs[i].loadfn(progs[i].invalid_type, NULL); + if (progs[i].fd != -1) { + log_err("Load with invalid type accepted for %s", + progs[i].name); + goto err; + } + printf("... REJECTED\n"); + + printf("Load %s with valid type", progs[i].name); + progs[i].fd = progs[i].loadfn(progs[i].type, progs[i].name); + if (progs[i].fd == -1) { + log_err("Failed to load program %s", progs[i].name); + goto err; + } + printf(" ... OK\n"); + + printf("Attach %s with invalid type", progs[i].name); + if (bpf_prog_attach(progs[i].fd, cgfd, progs[i].invalid_type, + BPF_F_ALLOW_OVERRIDE) != -1) { + log_err("Attach with invalid type accepted for %s", + progs[i].name); + goto err; + } + printf(" ... REJECTED\n"); + + printf("Attach %s with valid type", progs[i].name); + if (bpf_prog_attach(progs[i].fd, cgfd, progs[i].type, + BPF_F_ALLOW_OVERRIDE) == -1) { + log_err("Failed to attach program %s", progs[i].name); + goto err; + } + printf(" ... OK\n"); + } + + return 0; +err: + close_progs_fds(progs, prog_cnt); + return -1; +} + +static int run_domain_test(int domain, int cgfd, struct program *progs, + size_t prog_cnt, const char *ip, unsigned short port) +{ + int err = 0; + + if (load_and_attach_progs(cgfd, progs, prog_cnt) == -1) + goto err; + + if (run_test_case(domain, SOCK_STREAM, ip, port) == -1) + goto err; + + if (run_test_case(domain, SOCK_DGRAM, ip, port) == -1) + goto err; + + goto out; +err: + err = -1; +out: + close_progs_fds(progs, prog_cnt); + return err; +} + +static int run_test(void) +{ + size_t inet6_prog_cnt; + size_t inet_prog_cnt; + int cgfd = -1; + int err = 0; + + struct program inet6_progs[] = { + {BPF_CGROUP_INET6_BIND, bind6_prog_load, -1, "bind6", + BPF_CGROUP_INET4_BIND}, + {BPF_CGROUP_INET6_CONNECT, connect6_prog_load, -1, "connect6", + BPF_CGROUP_INET4_CONNECT}, + }; + inet6_prog_cnt = sizeof(inet6_progs) / sizeof(struct program); + + struct program inet_progs[] = { + {BPF_CGROUP_INET4_BIND, bind4_prog_load, -1, "bind4", + BPF_CGROUP_INET6_BIND}, + {BPF_CGROUP_INET4_CONNECT, connect4_prog_load, -1, "connect4", + BPF_CGROUP_INET6_CONNECT}, + }; + inet_prog_cnt = sizeof(inet_progs) / sizeof(struct program); + + if (setup_cgroup_environment()) + goto err; + + cgfd = create_and_get_cgroup(CG_PATH); + if (!cgfd) + goto err; + + if (join_cgroup(CG_PATH)) + goto err; + + if (run_domain_test(AF_INET, cgfd, inet_progs, inet_prog_cnt, SERV4_IP, + SERV4_PORT) == -1) + goto err; + + if (run_domain_test(AF_INET6, cgfd, inet6_progs, inet6_prog_cnt, + SERV6_IP, SERV6_PORT) == -1) + goto err; + + goto out; +err: + err = -1; +out: + close(cgfd); + cleanup_cgroup_environment(); + printf(err ? "### FAIL\n" : "### SUCCESS\n"); + return err; +} + +int main(int argc, char **argv) +{ + if (argc < 2) { + fprintf(stderr, + "%s has to be run via %s.sh. Skip direct run.\n", + argv[0], argv[0]); + exit(0); + } + return run_test(); +} diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh new file mode 100755 index 000000000000..c6e1dcf992c4 --- /dev/null +++ b/tools/testing/selftests/bpf/test_sock_addr.sh @@ -0,0 +1,57 @@ +#!/bin/sh + +set -eu + +ping_once() +{ + ping -q -c 1 -W 1 ${1%%/*} >/dev/null 2>&1 +} + +wait_for_ip() +{ + local _i + echo -n "Wait for testing IPv4/IPv6 to become available " + for _i in $(seq ${MAX_PING_TRIES}); do + echo -n "." + if ping_once ${TEST_IPv4} && ping_once ${TEST_IPv6}; then + echo " OK" + return + fi + done + echo 1>&2 "ERROR: Timeout waiting for test IP to become available." + exit 1 +} + +setup() +{ + # Create testing interfaces not to interfere with current environment. + ip link add dev ${TEST_IF} type veth peer name ${TEST_IF_PEER} + ip link set ${TEST_IF} up + ip link set ${TEST_IF_PEER} up + + ip -4 addr add ${TEST_IPv4} dev ${TEST_IF} + ip -6 addr add ${TEST_IPv6} dev ${TEST_IF} + wait_for_ip +} + +cleanup() +{ + ip link del ${TEST_IF} 2>/dev/null || : + ip link del ${TEST_IF_PEER} 2>/dev/null || : +} + +main() +{ + trap cleanup EXIT 2 3 6 15 + setup + ./test_sock_addr setup_done +} + +BASENAME=$(basename $0 .sh) +TEST_IF="${BASENAME}1" +TEST_IF_PEER="${BASENAME}2" +TEST_IPv4="127.0.0.4/8" +TEST_IPv6="::6/128" +MAX_PING_TRIES=5 + +main diff --git a/tools/testing/selftests/bpf/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/test_stacktrace_build_id.c new file mode 100644 index 000000000000..b755bd783ce5 --- /dev/null +++ b/tools/testing/selftests/bpf/test_stacktrace_build_id.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <linux/bpf.h> +#include "bpf_helpers.h" + +#ifndef PERF_MAX_STACK_DEPTH +#define PERF_MAX_STACK_DEPTH 127 +#endif + +struct bpf_map_def SEC("maps") control_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 1, +}; + +struct bpf_map_def SEC("maps") stackid_hmap = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 10000, +}; + +struct bpf_map_def SEC("maps") stackmap = { + .type = BPF_MAP_TYPE_STACK_TRACE, + .key_size = sizeof(__u32), + .value_size = sizeof(struct bpf_stack_build_id) + * PERF_MAX_STACK_DEPTH, + .max_entries = 128, + .map_flags = BPF_F_STACK_BUILD_ID, +}; + +/* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */ +struct random_urandom_args { + unsigned long long pad; + int got_bits; + int pool_left; + int input_left; +}; + +SEC("tracepoint/random/urandom_read") +int oncpu(struct random_urandom_args *args) +{ + __u32 key = 0, val = 0, *value_p; + + value_p = bpf_map_lookup_elem(&control_map, &key); + if (value_p && *value_p) + return 0; /* skip if non-zero *value_p */ + + /* The size of stackmap and stackid_hmap should be the same */ + key = bpf_get_stackid(args, &stackmap, BPF_F_USER_STACK); + if ((int)key >= 0) + bpf_map_update_elem(&stackid_hmap, &key, &val, 0); + + return 0; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/test_stacktrace_map.c b/tools/testing/selftests/bpf/test_stacktrace_map.c new file mode 100644 index 000000000000..76d85c5d08bd --- /dev/null +++ b/tools/testing/selftests/bpf/test_stacktrace_map.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <linux/bpf.h> +#include "bpf_helpers.h" + +#ifndef PERF_MAX_STACK_DEPTH +#define PERF_MAX_STACK_DEPTH 127 +#endif + +struct bpf_map_def SEC("maps") control_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 1, +}; + +struct bpf_map_def SEC("maps") stackid_hmap = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 10000, +}; + +struct bpf_map_def SEC("maps") stackmap = { + .type = BPF_MAP_TYPE_STACK_TRACE, + .key_size = sizeof(__u32), + .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH, + .max_entries = 10000, +}; + +/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ +struct sched_switch_args { + unsigned long long pad; + char prev_comm[16]; + int prev_pid; + int prev_prio; + long long prev_state; + char next_comm[16]; + int next_pid; + int next_prio; +}; + +SEC("tracepoint/sched/sched_switch") +int oncpu(struct sched_switch_args *ctx) +{ + __u32 key = 0, val = 0, *value_p; + + value_p = bpf_map_lookup_elem(&control_map, &key); + if (value_p && *value_p) + return 0; /* skip if non-zero *value_p */ + + /* The size of stackmap and stackid_hmap should be the same */ + key = bpf_get_stackid(ctx, &stackmap, 0); + if ((int)key >= 0) + bpf_map_update_elem(&stackid_hmap, &key, &val, 0); + + return 0; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c index 8b201895c569..6272c784ca2a 100644 --- a/tools/testing/selftests/bpf/test_tag.c +++ b/tools/testing/selftests/bpf/test_tag.c @@ -12,7 +12,6 @@ #include <assert.h> #include <sys/socket.h> -#include <sys/resource.h> #include <linux/filter.h> #include <linux/bpf.h> @@ -21,6 +20,7 @@ #include <bpf/bpf.h> #include "../../../include/linux/filter.h" +#include "bpf_rlimit.h" static struct bpf_insn prog[BPF_MAXINSNS]; @@ -184,11 +184,9 @@ static void do_test(uint32_t *tests, int start_insns, int fd_map, int main(void) { - struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; uint32_t tests = 0; int i, fd_map; - setrlimit(RLIMIT_MEMLOCK, &rinf); fd_map = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int), sizeof(int), 1, BPF_F_NO_PREALLOC); assert(fd_map > 0); diff --git a/tools/testing/selftests/bpf/test_tcpbpf.h b/tools/testing/selftests/bpf/test_tcpbpf.h new file mode 100644 index 000000000000..2fe43289943c --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcpbpf.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef _TEST_TCPBPF_H +#define _TEST_TCPBPF_H + +struct tcpbpf_globals { + __u32 event_map; + __u32 total_retrans; + __u32 data_segs_in; + __u32 data_segs_out; + __u32 bad_cb_test_rv; + __u32 good_cb_test_rv; + __u64 bytes_received; + __u64 bytes_acked; +}; +#endif diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/test_tcpbpf_kern.c new file mode 100644 index 000000000000..3e645ee41ed5 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcpbpf_kern.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stddef.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ip.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/tcp.h> +#include <netinet/in.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" +#include "test_tcpbpf.h" + +struct bpf_map_def SEC("maps") global_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct tcpbpf_globals), + .max_entries = 2, +}; + +static inline void update_event_map(int event) +{ + __u32 key = 0; + struct tcpbpf_globals g, *gp; + + gp = bpf_map_lookup_elem(&global_map, &key); + if (gp == NULL) { + struct tcpbpf_globals g = {0}; + + g.event_map |= (1 << event); + bpf_map_update_elem(&global_map, &key, &g, + BPF_ANY); + } else { + g = *gp; + g.event_map |= (1 << event); + bpf_map_update_elem(&global_map, &key, &g, + BPF_ANY); + } +} + +int _version SEC("version") = 1; + +SEC("sockops") +int bpf_testcb(struct bpf_sock_ops *skops) +{ + int rv = -1; + int bad_call_rv = 0; + int good_call_rv = 0; + int op; + int v = 0; + + op = (int) skops->op; + + update_event_map(op); + + switch (op) { + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + /* Test failure to set largest cb flag (assumes not defined) */ + bad_call_rv = bpf_sock_ops_cb_flags_set(skops, 0x80); + /* Set callback */ + good_call_rv = bpf_sock_ops_cb_flags_set(skops, + BPF_SOCK_OPS_STATE_CB_FLAG); + /* Update results */ + { + __u32 key = 0; + struct tcpbpf_globals g, *gp; + + gp = bpf_map_lookup_elem(&global_map, &key); + if (!gp) + break; + g = *gp; + g.bad_cb_test_rv = bad_call_rv; + g.good_cb_test_rv = good_call_rv; + bpf_map_update_elem(&global_map, &key, &g, + BPF_ANY); + } + break; + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + skops->sk_txhash = 0x12345f; + v = 0xff; + rv = bpf_setsockopt(skops, SOL_IPV6, IPV6_TCLASS, &v, + sizeof(v)); + break; + case BPF_SOCK_OPS_RTO_CB: + break; + case BPF_SOCK_OPS_RETRANS_CB: + break; + case BPF_SOCK_OPS_STATE_CB: + if (skops->args[1] == BPF_TCP_CLOSE) { + __u32 key = 0; + struct tcpbpf_globals g, *gp; + + gp = bpf_map_lookup_elem(&global_map, &key); + if (!gp) + break; + g = *gp; + g.total_retrans = skops->total_retrans; + g.data_segs_in = skops->data_segs_in; + g.data_segs_out = skops->data_segs_out; + g.bytes_received = skops->bytes_received; + g.bytes_acked = skops->bytes_acked; + bpf_map_update_elem(&global_map, &key, &g, + BPF_ANY); + } + break; + default: + rv = -1; + } + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c new file mode 100644 index 000000000000..84ab5163c828 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <signal.h> +#include <string.h> +#include <assert.h> +#include <linux/perf_event.h> +#include <linux/ptrace.h> +#include <linux/bpf.h> +#include <sys/ioctl.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> +#include "bpf_util.h" +#include "bpf_rlimit.h" +#include <linux/perf_event.h> +#include "test_tcpbpf.h" + +static int bpf_find_map(const char *test, struct bpf_object *obj, + const char *name) +{ + struct bpf_map *map; + + map = bpf_object__find_map_by_name(obj, name); + if (!map) { + printf("%s:FAIL:map '%s' not found\n", test, name); + return -1; + } + return bpf_map__fd(map); +} + +#define SYSTEM(CMD) \ + do { \ + if (system(CMD)) { \ + printf("system(%s) FAILS!\n", CMD); \ + } \ + } while (0) + +int main(int argc, char **argv) +{ + const char *file = "test_tcpbpf_kern.o"; + struct tcpbpf_globals g = {0}; + int cg_fd, prog_fd, map_fd; + bool debug_flag = false; + int error = EXIT_FAILURE; + struct bpf_object *obj; + char cmd[100], *dir; + struct stat buffer; + __u32 key = 0; + int pid; + int rv; + + if (argc > 1 && strcmp(argv[1], "-d") == 0) + debug_flag = true; + + dir = "/tmp/cgroupv2/foo"; + + if (stat(dir, &buffer) != 0) { + SYSTEM("mkdir -p /tmp/cgroupv2"); + SYSTEM("mount -t cgroup2 none /tmp/cgroupv2"); + SYSTEM("mkdir -p /tmp/cgroupv2/foo"); + } + pid = (int) getpid(); + sprintf(cmd, "echo %d >> /tmp/cgroupv2/foo/cgroup.procs", pid); + SYSTEM(cmd); + + cg_fd = open(dir, O_DIRECTORY, O_RDONLY); + if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { + printf("FAILED: load_bpf_file failed for: %s\n", file); + goto err; + } + + rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0); + if (rv) { + printf("FAILED: bpf_prog_attach: %d (%s)\n", + error, strerror(errno)); + goto err; + } + + SYSTEM("./tcp_server.py"); + + map_fd = bpf_find_map(__func__, obj, "global_map"); + if (map_fd < 0) + goto err; + + rv = bpf_map_lookup_elem(map_fd, &key, &g); + if (rv != 0) { + printf("FAILED: bpf_map_lookup_elem returns %d\n", rv); + goto err; + } + + if (g.bytes_received != 501 || g.bytes_acked != 1002 || + g.data_segs_in != 1 || g.data_segs_out != 1 || + (g.event_map ^ 0x47e) != 0 || g.bad_cb_test_rv != 0x80 || + g.good_cb_test_rv != 0) { + printf("FAILED: Wrong stats\n"); + if (debug_flag) { + printf("\n"); + printf("bytes_received: %d (expecting 501)\n", + (int)g.bytes_received); + printf("bytes_acked: %d (expecting 1002)\n", + (int)g.bytes_acked); + printf("data_segs_in: %d (expecting 1)\n", + g.data_segs_in); + printf("data_segs_out: %d (expecting 1)\n", + g.data_segs_out); + printf("event_map: 0x%x (at least 0x47e)\n", + g.event_map); + printf("bad_cb_test_rv: 0x%x (expecting 0x80)\n", + g.bad_cb_test_rv); + printf("good_cb_test_rv:0x%x (expecting 0)\n", + g.good_cb_test_rv); + } + goto err; + } + printf("PASSED!\n"); + error = 0; +err: + bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS); + return error; + +} diff --git a/tools/testing/selftests/bpf/test_tracepoint.c b/tools/testing/selftests/bpf/test_tracepoint.c new file mode 100644 index 000000000000..04bf084517e0 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tracepoint.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook + +#include <linux/bpf.h> +#include "bpf_helpers.h" + +/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ +struct sched_switch_args { + unsigned long long pad; + char prev_comm[16]; + int prev_pid; + int prev_prio; + long long prev_state; + char next_comm[16]; + int next_pid; + int next_prio; +}; + +SEC("tracepoint/sched/sched_switch") +int oncpu(struct sched_switch_args *ctx) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 5ed4175c4ff8..3e7718b1a9ae 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -2,6 +2,7 @@ * Testsuite for eBPF verifier * * Copyright (c) 2014 PLUMgrid, http://plumgrid.com + * Copyright (c) 2017 Facebook * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -20,14 +21,15 @@ #include <stddef.h> #include <stdbool.h> #include <sched.h> +#include <limits.h> #include <sys/capability.h> -#include <sys/resource.h> #include <linux/unistd.h> #include <linux/filter.h> #include <linux/bpf_perf_event.h> #include <linux/bpf.h> +#include <linux/if_ether.h> #include <bpf/bpf.h> @@ -38,7 +40,7 @@ # define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1 # endif #endif - +#include "bpf_rlimit.h" #include "../../../include/linux/filter.h" #ifndef ARRAY_SIZE @@ -48,10 +50,15 @@ #define MAX_INSNS 512 #define MAX_FIXUPS 8 #define MAX_NR_MAPS 4 +#define POINTER_VALUE 0xcafe4all +#define TEST_DATA_LEN 64 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0) #define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1) +#define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled" +static bool unpriv_disabled = false; + struct bpf_test { const char *descr; struct bpf_insn insns[MAX_INSNS]; @@ -61,6 +68,7 @@ struct bpf_test { int fixup_map_in_map[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; + uint32_t retval; enum { UNDEF, ACCEPT, @@ -94,6 +102,326 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = -3, + }, + { + "DIV32 by 0, zero check 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "DIV32 by 0, zero check 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "DIV64 by 0, zero check", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU64_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "MOD32 by 0, zero check 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "MOD32 by 0, zero check 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "MOD64 by 0, zero check", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "DIV32 by 0, zero check ok, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 2), + BPF_MOV32_IMM(BPF_REG_2, 16), + BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 8, + }, + { + "DIV32 by 0, zero check 1, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV32 by 0, zero check 2, cls", + .insns = { + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV64 by 0, zero check, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "MOD32 by 0, zero check ok, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_MOV32_IMM(BPF_REG_1, 3), + BPF_MOV32_IMM(BPF_REG_2, 5), + BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 2, + }, + { + "MOD32 by 0, zero check 1, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "MOD32 by 0, zero check 2, cls", + .insns = { + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "MOD64 by 0, zero check 1, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, 2), + BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 2, + }, + { + "MOD64 by 0, zero check 2, cls", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_0, -1), + BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = -1, + }, + /* Just make sure that JITs used udiv/umod as otherwise we get + * an exception from INT_MIN/-1 overflow similarly as with div + * by zero. + */ + { + "DIV32 overflow, check 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, -1), + BPF_MOV32_IMM(BPF_REG_0, INT_MIN), + BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV32 overflow, check 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, INT_MIN), + BPF_ALU32_IMM(BPF_DIV, BPF_REG_0, -1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV64 overflow, check 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_LD_IMM64(BPF_REG_0, LLONG_MIN), + BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "DIV64 overflow, check 2", + .insns = { + BPF_LD_IMM64(BPF_REG_0, LLONG_MIN), + BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, -1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 0, + }, + { + "MOD32 overflow, check 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, -1), + BPF_MOV32_IMM(BPF_REG_0, INT_MIN), + BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = INT_MIN, + }, + { + "MOD32 overflow, check 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, INT_MIN), + BPF_ALU32_IMM(BPF_MOD, BPF_REG_0, -1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = INT_MIN, + }, + { + "MOD64 overflow, check 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_LD_IMM64(BPF_REG_2, LLONG_MIN), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "MOD64 overflow, check 2", + .insns = { + BPF_LD_IMM64(BPF_REG_2, LLONG_MIN), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_ALU64_IMM(BPF_MOD, BPF_REG_2, -1), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "xor32 zero extend check", + .insns = { + BPF_MOV32_IMM(BPF_REG_2, -1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 32), + BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 0xffff), + BPF_ALU32_REG(BPF_XOR, BPF_REG_2, BPF_REG_2), + BPF_MOV32_IMM(BPF_REG_0, 2), + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "empty prog", + .insns = { + }, + .errstr = "unknown opcode 00", + .result = REJECT, + }, + { + "only exit insn", + .insns = { + BPF_EXIT_INSN(), + }, + .errstr = "R0 !read_ok", + .result = REJECT, }, { "unreachable", @@ -209,6 +537,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = 1, }, { "test8 ld_imm64", @@ -280,7 +609,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "BPF_ARSH not supported for 32 bit ALU", + .errstr = "unknown opcode c4", }, { "arsh32 on reg", @@ -291,7 +620,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "BPF_ARSH not supported for 32 bit ALU", + .errstr = "unknown opcode cc", }, { "arsh64 on imm", @@ -317,7 +646,7 @@ static struct bpf_test tests[] = { .insns = { BPF_ALU64_REG(BPF_MOV, BPF_REG_0, BPF_REG_2), }, - .errstr = "jump out of range", + .errstr = "not an exit", .result = REJECT, }, { @@ -407,7 +736,7 @@ static struct bpf_test tests[] = { BPF_RAW_INSN(BPF_JMP | BPF_CALL | BPF_X, 0, 0, 0, 0), BPF_EXIT_INSN(), }, - .errstr = "BPF_CALL uses reserved", + .errstr = "unknown opcode 8d", .result = REJECT, }, { @@ -516,6 +845,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R0 leaks addr", .result = ACCEPT, .result_unpriv = REJECT, + .retval = POINTER_VALUE, }, { "check valid spill/fill, skb mark", @@ -596,7 +926,7 @@ static struct bpf_test tests[] = { BPF_RAW_INSN(0, 0, 0, 0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid BPF_LD_IMM", + .errstr = "unknown opcode 00", .result = REJECT, }, { @@ -614,7 +944,7 @@ static struct bpf_test tests[] = { BPF_RAW_INSN(-1, 0, 0, 0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid BPF_ALU opcode f0", + .errstr = "unknown opcode ff", .result = REJECT, }, { @@ -623,7 +953,7 @@ static struct bpf_test tests[] = { BPF_RAW_INSN(-1, -1, -1, -1, -1), BPF_EXIT_INSN(), }, - .errstr = "invalid BPF_ALU opcode f0", + .errstr = "unknown opcode ff", .result = REJECT, }, { @@ -802,6 +1132,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R1 pointer comparison", .result_unpriv = REJECT, .result = ACCEPT, + .retval = -ENOENT, }, { "jump test 4", @@ -1266,6 +1597,60 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SK_SKB, }, { + "direct packet read for SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, + offsetof(struct sk_msg_md, data)), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, + offsetof(struct sk_msg_md, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { + "direct packet write for SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, + offsetof(struct sk_msg_md, data)), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, + offsetof(struct sk_msg_md, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { + "overlapping checks for direct packet access SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, + offsetof(struct sk_msg_md, data)), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, + offsetof(struct sk_msg_md, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { "check skb->mark is not writeable by sockets", .insns = { BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, @@ -1822,6 +2207,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = 0xfaceb00c, }, { "PTR_TO_STACK store/load - bad alignment on off", @@ -1880,6 +2266,7 @@ static struct bpf_test tests[] = { .result = ACCEPT, .result_unpriv = REJECT, .errstr_unpriv = "R0 leaks addr", + .retval = POINTER_VALUE, }, { "unpriv: add const to pointer", @@ -2053,6 +2440,7 @@ static struct bpf_test tests[] = { BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_hash_recalc), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, .result = ACCEPT, @@ -2255,6 +2643,90 @@ static struct bpf_test tests[] = { .result = ACCEPT, }, { + "runtime/jit: tail_call within bounds, prog once", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_prog = { 1 }, + .result = ACCEPT, + .retval = 42, + }, + { + "runtime/jit: tail_call within bounds, prog loop", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_prog = { 1 }, + .result = ACCEPT, + .retval = 41, + }, + { + "runtime/jit: tail_call within bounds, no prog", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_prog = { 1 }, + .result = ACCEPT, + .retval = 1, + }, + { + "runtime/jit: tail_call out of bounds", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, 256), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .fixup_prog = { 1 }, + .result = ACCEPT, + .retval = 2, + }, + { + "runtime/jit: pass negative index to tail_call", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, -1), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .fixup_prog = { 1 }, + .result = ACCEPT, + .retval = 2, + }, + { + "runtime/jit: pass > 32bit index to tail_call", + .insns = { + BPF_LD_IMM64(BPF_REG_3, 0x100000000ULL), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .fixup_prog = { 2 }, + .result = ACCEPT, + .retval = 42, + }, + { "stack pointer arithmetic", .insns = { BPF_MOV64_IMM(BPF_REG_1, 4), @@ -2840,6 +3312,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, }, { "direct packet access: test12 (and, good access)", @@ -2864,6 +3337,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, }, { "direct packet access: test13 (branches, good access)", @@ -2894,6 +3368,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, }, { "direct packet access: test14 (pkt_ptr += 0, CONST_IMM, good access)", @@ -2917,6 +3392,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, }, { "direct packet access: test15 (spill with xadd)", @@ -3203,6 +3679,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 1, }, { "direct packet access: test28 (marking on <=, bad access)", @@ -5700,7 +6177,7 @@ static struct bpf_test tests[] = { "helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)", .insns = { BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_2, 1), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), @@ -5822,6 +6299,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = 0 /* csum_diff of 64-byte packet */, }, { "helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)", @@ -5935,7 +6413,7 @@ static struct bpf_test tests[] = { BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_2, 1), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63), @@ -6190,6 +6668,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .retval = 42 /* ultimate return value */, }, { "ld_ind: check calling conv, r1", @@ -6261,6 +6740,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = 1, }, { "check bpf_perf_event_data->sample_period byte load permitted", @@ -7248,6 +7728,7 @@ static struct bpf_test tests[] = { }, .fixup_map1 = { 3 }, .result = ACCEPT, + .retval = POINTER_VALUE, .result_unpriv = REJECT, .errstr_unpriv = "R0 leaks addr as return value" }, @@ -7268,6 +7749,7 @@ static struct bpf_test tests[] = { }, .fixup_map1 = { 3 }, .result = ACCEPT, + .retval = POINTER_VALUE, .result_unpriv = REJECT, .errstr_unpriv = "R0 leaks addr as return value" }, @@ -7434,10 +7916,24 @@ static struct bpf_test tests[] = { }, BPF_EXIT_INSN(), }, - .errstr = "BPF_END uses reserved fields", + .errstr = "unknown opcode d7", .result = REJECT, }, { + "XDP, using ifindex from netdev", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, ingress_ifindex)), + BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 1, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .retval = 1, + }, + { "meta access, test1", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, @@ -7709,6 +8205,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = TEST_DATA_LEN, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { @@ -8656,6 +9153,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .retval = 1, }, { "check deducing bounds from const, 3", @@ -8826,6 +9324,2105 @@ static struct bpf_test tests[] = { .result = REJECT, .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, }, + { + "calls: basic sanity", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + }, + { + "calls: not on unpriviledged", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = 1, + }, + { + "calls: div by 0 in subprog", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(BPF_REG_2, 0), + BPF_MOV32_IMM(BPF_REG_3, 1), + BPF_ALU32_REG(BPF_DIV, BPF_REG_3, BPF_REG_2), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "calls: multiple ret types in subprog 1", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV32_IMM(BPF_REG_0, 42), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "R0 invalid mem access 'inv'", + }, + { + "calls: multiple ret types in subprog 2", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 9), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, + offsetof(struct __sk_buff, data)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 64), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 16 }, + .result = REJECT, + .errstr = "R0 min value is outside of the array range", + }, + { + "calls: overlapping caller/callee", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "last insn is not an exit or jmp", + .result = REJECT, + }, + { + "calls: wrong recursive calls", + .insns = { + BPF_JMP_IMM(BPF_JA, 0, 0, 4), + BPF_JMP_IMM(BPF_JA, 0, 0, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "calls: wrong src reg", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 2, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "BPF_CALL uses reserved fields", + .result = REJECT, + }, + { + "calls: wrong off value", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, -1, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "BPF_CALL uses reserved fields", + .result = REJECT, + }, + { + "calls: jump back loop", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn 0 to 0", + .result = REJECT, + }, + { + "calls: conditional call", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "calls: conditional call 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + }, + { + "calls: conditional call 3", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_JMP_IMM(BPF_JA, 0, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -6), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_JMP_IMM(BPF_JA, 0, 0, -6), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn", + .result = REJECT, + }, + { + "calls: conditional call 4", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -5), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + }, + { + "calls: conditional call 5", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -6), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn", + .result = REJECT, + }, + { + "calls: conditional call 6", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -2), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn", + .result = REJECT, + }, + { + "calls: using r0 returned by callee", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + }, + { + "calls: using uninit r0 from callee", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "!read_ok", + .result = REJECT, + }, + { + "calls: callee is using r1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_ACT, + .result = ACCEPT, + .retval = TEST_DATA_LEN, + }, + { + "calls: callee using args1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = POINTER_VALUE, + }, + { + "calls: callee using wrong args2", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "R2 !read_ok", + .result = REJECT, + }, + { + "calls: callee using two args", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, + offsetof(struct __sk_buff, len)), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6, + offsetof(struct __sk_buff, len)), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = TEST_DATA_LEN + TEST_DATA_LEN - ETH_HLEN - ETH_HLEN, + }, + { + "calls: callee changing pkt pointers", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_8, BPF_REG_7, 2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + /* clear_all_pkt_pointers() has to walk all frames + * to make sure that pkt pointers in the caller + * are cleared when callee is calling a helper that + * adjusts packet size + */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_xdp_adjust_head), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R6 invalid mem access 'inv'", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "calls: two calls with args", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = TEST_DATA_LEN + TEST_DATA_LEN, + }, + { + "calls: calls with stack arith", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 42, + }, + { + "calls: calls with misaligned stack access", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -61), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, + .errstr = "misaligned stack access", + .result = REJECT, + }, + { + "calls: calls control flow, jump test", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 43), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 43, + }, + { + "calls: calls control flow, jump test 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 43), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "jump out of range from insn 1 to 4", + .result = REJECT, + }, + { + "calls: two calls with bad jump", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range from insn 11 to 9", + .result = REJECT, + }, + { + "calls: recursive call. test1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge", + .result = REJECT, + }, + { + "calls: recursive call. test2", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge", + .result = REJECT, + }, + { + "calls: unreachable code", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "unreachable insn 6", + .result = REJECT, + }, + { + "calls: invalid call", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -4), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "invalid destination", + .result = REJECT, + }, + { + "calls: invalid call 2", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0x7fffffff), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "invalid destination", + .result = REJECT, + }, + { + "calls: jumping across function bodies. test1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "calls: jumping across function bodies. test2", + .insns = { + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "calls: call without exit", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -2), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "not an exit", + .result = REJECT, + }, + { + "calls: call into middle of ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "last insn", + .result = REJECT, + }, + { + "calls: call into middle of other call", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "last insn", + .result = REJECT, + }, + { + "calls: ld_abs with changing ctx data in callee", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_vlan_push), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "BPF_LD_[ABS|IND] instructions cannot be mixed", + .result = REJECT, + }, + { + "calls: two calls with bad fallthrough", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "not an exit", + .result = REJECT, + }, + { + "calls: two calls with stack read", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, + }, + { + "calls: two calls with stack write", + .insns = { + /* main prog */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 7), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_8), + /* write into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* read from stack frame of main prog */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, + }, + { + "calls: stack overflow using two frames (pre-call access)", + .insns = { + /* prog 1 */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* prog 2 */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "combined stack size", + .result = REJECT, + }, + { + "calls: stack overflow using two frames (post-call access)", + .insns = { + /* prog 1 */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 2), + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_EXIT_INSN(), + + /* prog 2 */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "combined stack size", + .result = REJECT, + }, + { + "calls: stack depth check using three frames. test1", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 5), /* call B */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0), + BPF_EXIT_INSN(), + /* B */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -3), /* call A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + /* stack_main=32, stack_A=256, stack_B=64 + * and max(main+A, main+A+B) < 512 + */ + .result = ACCEPT, + }, + { + "calls: stack depth check using three frames. test2", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 5), /* call B */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0), + BPF_EXIT_INSN(), + /* B */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -3), /* call A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + /* stack_main=32, stack_A=64, stack_B=256 + * and max(main+A, main+A+B) < 512 + */ + .result = ACCEPT, + }, + { + "calls: stack depth check using three frames. test3", + .insns = { + /* main */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 6), /* call A */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 8), /* call B */ + BPF_JMP_IMM(BPF_JGE, BPF_REG_6, 0, 1), + BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 10, 1), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_B, BPF_REG_10, -224, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, -3), + /* B */ + BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -6), /* call A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + /* stack_main=64, stack_A=224, stack_B=256 + * and max(main+A, main+A+B) > 512 + */ + .errstr = "combined stack", + .result = REJECT, + }, + { + "calls: stack depth check using three frames. test4", + /* void main(void) { + * func1(0); + * func1(1); + * func2(1); + * } + * void func1(int alloc_or_recurse) { + * if (alloc_or_recurse) { + * frame_pointer[-300] = 1; + * } else { + * func2(alloc_or_recurse); + * } + * } + * void func2(int alloc_or_recurse) { + * if (alloc_or_recurse) { + * frame_pointer[-300] = 1; + * } + * } + */ + .insns = { + /* main */ + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 6), /* call A */ + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */ + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 7), /* call B */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2), + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call B */ + BPF_EXIT_INSN(), + /* B */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = REJECT, + .errstr = "combined stack", + }, + { + "calls: stack depth check using three frames. test5", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call A */ + BPF_EXIT_INSN(), + /* A */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call B */ + BPF_EXIT_INSN(), + /* B */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call C */ + BPF_EXIT_INSN(), + /* C */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call D */ + BPF_EXIT_INSN(), + /* D */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call E */ + BPF_EXIT_INSN(), + /* E */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call F */ + BPF_EXIT_INSN(), + /* F */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call G */ + BPF_EXIT_INSN(), + /* G */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call H */ + BPF_EXIT_INSN(), + /* H */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "call stack", + .result = REJECT, + }, + { + "calls: spill into caller stack frame", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "cannot spill", + .result = REJECT, + }, + { + "calls: write into caller stack frame", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, + .retval = 42, + }, + { + "calls: write into callee stack frame", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, -8), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "cannot return stack pointer", + .result = REJECT, + }, + { + "calls: two calls with stack write and void return", + .insns = { + /* main prog */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* write into stack frame of main prog */ + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0), + BPF_EXIT_INSN(), /* void return */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, + }, + { + "calls: ambiguous return value", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "allowed for root only", + .result_unpriv = REJECT, + .errstr = "R0 !read_ok", + .result = REJECT, + }, + { + "calls: two calls that return map_value", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + /* fetch secound map_value_ptr from the stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* call 3rd function twice */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* first time with fp-8 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + /* second time with fp-16 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), /* return 0 */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .fixup_map1 = { 23 }, + .result = ACCEPT, + }, + { + "calls: two calls that return map_value with bool condition", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* call 3rd function twice */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* first time with fp-8 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + /* second time with fp-16 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + /* fetch secound map_value_ptr from the stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), /* return 0 */ + /* write map_value_ptr into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), /* return 1 */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .fixup_map1 = { 23 }, + .result = ACCEPT, + }, + { + "calls: two calls that return map_value with incorrect bool check", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* call 3rd function twice */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* first time with fp-8 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + /* second time with fp-16 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + /* fetch secound map_value_ptr from the stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), /* return 0 */ + /* write map_value_ptr into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), /* return 1 */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .fixup_map1 = { 23 }, + .result = REJECT, + .errstr = "invalid read from stack off -16+0 size 8", + }, + { + "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */ + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), /* 34 */ + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=8 off=2 size=8", + }, + { + "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test2", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */ + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), /* 34 */ + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = ACCEPT, + }, + { + "calls: two jumps that receive map_value via arg=ptr_stack_of_jumper. test3", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -24, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), // 26 + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), // 30 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), // 34 + BPF_JMP_IMM(BPF_JA, 0, 0, -30), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, -8), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=8 off=2 size=8", + }, + { + "calls: two calls that receive map_value_ptr_or_null via arg. test1", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = ACCEPT, + }, + { + "calls: two calls that receive map_value_ptr_or_null via arg. test2", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 0 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 0, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = REJECT, + .errstr = "R0 invalid mem access 'inv'", + }, + { + "calls: pkt_ptr spill into caller stack", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + /* now the pkt range is verified, read pkt_ptr from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = POINTER_VALUE, + }, + { + "calls: pkt_ptr spill into caller stack 2", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + /* Marking is still kept, but not in all cases safe. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + /* now the pkt range is verified, read pkt_ptr from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "invalid access to packet", + .result = REJECT, + }, + { + "calls: pkt_ptr spill into caller stack 3", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + /* Marking is still kept and safe here. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* now the pkt range is verified, read pkt_ptr from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "calls: pkt_ptr spill into caller stack 4", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + /* Check marking propagated. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "calls: pkt_ptr spill into caller stack 5", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "same insn cannot be used with different", + .result = REJECT, + }, + { + "calls: pkt_ptr spill into caller stack 6", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "R4 invalid mem access", + .result = REJECT, + }, + { + "calls: pkt_ptr spill into caller stack 7", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "R4 invalid mem access", + .result = REJECT, + }, + { + "calls: pkt_ptr spill into caller stack 8", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "calls: pkt_ptr spill into caller stack 9", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "invalid access to packet", + .result = REJECT, + }, + { + "calls: caller stack init to zero or map_value_or_null", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + /* fetch map_value_or_null or const_zero from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + /* store into map_value */ + BPF_ST_MEM(BPF_W, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* if (ctx == 0) return; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 8), + /* else bpf_map_lookup() and *(fp - 8) = r0 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 13 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "calls: stack init to zero and pruning", + .insns = { + /* first make allocated_stack 16 byte */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), + /* now fork the execution such that the false branch + * of JGT insn will be verified second and it skisp zero + * init of fp-8 stack slot. If stack liveness marking + * is missing live_read marks from call map_lookup + * processing then pruning will incorrectly assume + * that fp-8 stack slot was unused in the fall-through + * branch and will accept the program incorrectly + */ + BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 6 }, + .errstr = "invalid indirect read from stack off -8+0 size 8", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "search pruning: all branches should be verified (nop operation)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_A(1), + BPF_MOV64_IMM(BPF_REG_4, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16), + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_5, 0, 2), + BPF_MOV64_IMM(BPF_REG_6, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xdead), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "R6 invalid mem access 'inv'", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "search pruning: all branches should be verified (invalid stack access)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16), + BPF_JMP_A(1), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -24), + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "invalid read from stack off -16+0 size 8", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "jit: lsh, rsh, arsh by 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 0xff), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 1), + BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x3fc, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 1), + BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0xff, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x7f, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, + }, + { + "jit: mov32 for ldimm64, 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_LD_IMM64(BPF_REG_1, 0xfeffffffffffffffULL), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32), + BPF_LD_IMM64(BPF_REG_2, 0xfeffffffULL), + BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, + }, + { + "jit: mov32 for ldimm64, 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_IMM64(BPF_REG_1, 0x1ffffffffULL), + BPF_LD_IMM64(BPF_REG_2, 0xffffffffULL), + BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, + }, + { + "jit: various mul tests", + .insns = { + BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL), + BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL), + BPF_LD_IMM64(BPF_REG_1, 0xefefefULL), + BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_1), + BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL), + BPF_ALU64_REG(BPF_MUL, BPF_REG_3, BPF_REG_1), + BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV32_REG(BPF_REG_2, BPF_REG_2), + BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL), + BPF_ALU32_REG(BPF_MUL, BPF_REG_0, BPF_REG_1), + BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL), + BPF_ALU32_REG(BPF_MUL, BPF_REG_3, BPF_REG_1), + BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_0, 0x952a7bbcULL), + BPF_LD_IMM64(BPF_REG_1, 0xfefefeULL), + BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL), + BPF_ALU32_REG(BPF_MUL, BPF_REG_2, BPF_REG_1), + BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, + }, + { + "xadd/w check unaligned stack", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -7), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "misaligned stack access off", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "xadd/w check unaligned map", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_STX_XADD(BPF_W, BPF_REG_0, BPF_REG_1, 3), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .result = REJECT, + .errstr = "misaligned value access off", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "xadd/w check unaligned pkt", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_0, 99), + BPF_JMP_IMM(BPF_JA, 0, 0, 6), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_ST_MEM(BPF_W, BPF_REG_2, 3, 0), + BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 1), + BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 2), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "BPF_XADD stores into R2 packet", + .prog_type = BPF_PROG_TYPE_XDP, + }, }; static int probe_filter_length(const struct bpf_insn *fp) @@ -8850,16 +11447,61 @@ static int create_map(uint32_t size_value, uint32_t max_elem) return fd; } +static int create_prog_dummy1(void) +{ + struct bpf_insn prog[] = { + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_EXIT_INSN(), + }; + + return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog, + ARRAY_SIZE(prog), "GPL", 0, NULL, 0); +} + +static int create_prog_dummy2(int mfd, int idx) +{ + struct bpf_insn prog[] = { + BPF_MOV64_IMM(BPF_REG_3, idx), + BPF_LD_MAP_FD(BPF_REG_2, mfd), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 41), + BPF_EXIT_INSN(), + }; + + return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog, + ARRAY_SIZE(prog), "GPL", 0, NULL, 0); +} + static int create_prog_array(void) { - int fd; + int p1key = 0, p2key = 1; + int mfd, p1fd, p2fd; - fd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int), - sizeof(int), 4, 0); - if (fd < 0) + mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int), + sizeof(int), 4, 0); + if (mfd < 0) { printf("Failed to create prog array '%s'!\n", strerror(errno)); + return -1; + } - return fd; + p1fd = create_prog_dummy1(); + p2fd = create_prog_dummy2(mfd, p2key); + if (p1fd < 0 || p2fd < 0) + goto out; + if (bpf_map_update_elem(mfd, &p1key, &p1fd, BPF_ANY) < 0) + goto out; + if (bpf_map_update_elem(mfd, &p2key, &p2fd, BPF_ANY) < 0) + goto out; + close(p2fd); + close(p1fd); + + return mfd; +out: + close(p2fd); + close(p1fd); + close(mfd); + return -1; } static int create_map_in_map(void) @@ -8937,10 +11579,12 @@ static void do_test_single(struct bpf_test *test, bool unpriv, int fd_prog, expected_ret, reject_from_alignment; struct bpf_insn *prog = test->insns; int prog_len = probe_filter_length(prog); + char data_in[TEST_DATA_LEN] = {}; int prog_type = test->prog_type; int map_fds[MAX_NR_MAPS]; const char *expected_err; - int i; + uint32_t retval; + int i, err; for (i = 0; i < MAX_NR_MAPS; i++) map_fds[i] = -1; @@ -8978,11 +11622,25 @@ static void do_test_single(struct bpf_test *test, bool unpriv, goto fail_log; } if (!strstr(bpf_vlog, expected_err) && !reject_from_alignment) { - printf("FAIL\nUnexpected error message!\n"); + printf("FAIL\nUnexpected error message!\n\tEXP: %s\n\tRES: %s\n", + expected_err, bpf_vlog); goto fail_log; } } + if (fd_prog >= 0) { + err = bpf_prog_test_run(fd_prog, 1, data_in, sizeof(data_in), + NULL, NULL, &retval, NULL); + if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) { + printf("Unexpected bpf_prog_test_run error\n"); + goto fail_log; + } + if (!err && retval != test->retval && + test->retval != POINTER_VALUE) { + printf("FAIL retval %d != %d\n", retval, test->retval); + goto fail_log; + } + } (*passes)++; printf("OK%s\n", reject_from_alignment ? " (NOTE: reject due to unknown alignment)" : ""); @@ -9049,9 +11707,20 @@ out: return ret; } +static void get_unpriv_disabled() +{ + char buf[2]; + FILE *fd; + + fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r"); + if (fgets(buf, 2, fd) == buf && atoi(buf)) + unpriv_disabled = true; + fclose(fd); +} + static int do_test(bool unpriv, unsigned int from, unsigned int to) { - int i, passes = 0, errors = 0; + int i, passes = 0, errors = 0, skips = 0; for (i = from; i < to; i++) { struct bpf_test *test = &tests[i]; @@ -9059,7 +11728,10 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to) /* Program types that are not supported by non-root we * skip right away. */ - if (!test->prog_type) { + if (!test->prog_type && unpriv_disabled) { + printf("#%d/u %s SKIP\n", i, test->descr); + skips++; + } else if (!test->prog_type) { if (!unpriv) set_admin(false); printf("#%d/u %s ", i, test->descr); @@ -9068,20 +11740,22 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to) set_admin(true); } - if (!unpriv) { + if (unpriv) { + printf("#%d/p %s SKIP\n", i, test->descr); + skips++; + } else { printf("#%d/p %s ", i, test->descr); do_test_single(test, false, &passes, &errors); } } - printf("Summary: %d PASSED, %d FAILED\n", passes, errors); + printf("Summary: %d PASSED, %d SKIPPED, %d FAILED\n", passes, + skips, errors); return errors ? EXIT_FAILURE : EXIT_SUCCESS; } int main(int argc, char **argv) { - struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; - struct rlimit rlim = { 1 << 20, 1 << 20 }; unsigned int from = 0, to = ARRAY_SIZE(tests); bool unpriv = !is_admin(); @@ -9102,6 +11776,12 @@ int main(int argc, char **argv) } } - setrlimit(RLIMIT_MEMLOCK, unpriv ? &rlim : &rinf); + get_unpriv_disabled(); + if (unpriv && unpriv_disabled) { + printf("Cannot run as unprivileged user with sysctl %s.\n", + UNPRIV_SYSCTL); + return EXIT_FAILURE; + } + return do_test(unpriv, from, to); } diff --git a/tools/testing/selftests/bpf/test_verifier_log.c b/tools/testing/selftests/bpf/test_verifier_log.c index e9626cf5607a..8d6918c3b4a2 100644 --- a/tools/testing/selftests/bpf/test_verifier_log.c +++ b/tools/testing/selftests/bpf/test_verifier_log.c @@ -4,7 +4,6 @@ #include <string.h> #include <unistd.h> #include <sys/time.h> -#include <sys/resource.h> #include <linux/bpf.h> #include <linux/filter.h> @@ -12,6 +11,8 @@ #include <bpf/bpf.h> +#include "bpf_rlimit.h" + #define LOG_SIZE (1 << 20) #define err(str...) printf("ERROR: " str) @@ -133,16 +134,11 @@ static void test_log_bad(char *log, size_t log_len, int log_level) int main(int argc, char **argv) { - struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY }; char full_log[LOG_SIZE]; char log[LOG_SIZE]; size_t want_len; int i; - /* allow unlimited locked memory to have more consistent error code */ - if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0) - perror("Unable to lift memlock rlimit"); - memset(log, 1, LOG_SIZE); /* Test incorrect attr */ diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh index 307aa856cee3..637fcf4fe4e3 100755 --- a/tools/testing/selftests/bpf/test_xdp_meta.sh +++ b/tools/testing/selftests/bpf/test_xdp_meta.sh @@ -9,6 +9,7 @@ cleanup() fi set +e + ip link del veth1 2> /dev/null ip netns del ns1 2> /dev/null ip netns del ns2 2> /dev/null } diff --git a/tools/testing/selftests/bpf/test_xdp_noinline.c b/tools/testing/selftests/bpf/test_xdp_noinline.c new file mode 100644 index 000000000000..5e4aac74f9d0 --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdp_noinline.c @@ -0,0 +1,833 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/pkt_cls.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include "bpf_helpers.h" + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +static __u32 rol32(__u32 word, unsigned int shift) +{ + return (word << shift) | (word >> ((-shift) & 31)); +} + +/* copy paste of jhash from kernel sources to make sure llvm + * can compile it into valid sequence of bpf instructions + */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= c; a ^= rol32(c, 4); c += b; \ + b -= a; b ^= rol32(a, 6); a += c; \ + c -= b; c ^= rol32(b, 8); b += a; \ + a -= c; a ^= rol32(c, 16); c += b; \ + b -= a; b ^= rol32(a, 19); a += c; \ + c -= b; c ^= rol32(b, 4); b += a; \ +} + +#define __jhash_final(a, b, c) \ +{ \ + c ^= b; c -= rol32(b, 14); \ + a ^= c; a -= rol32(c, 11); \ + b ^= a; b -= rol32(a, 25); \ + c ^= b; c -= rol32(b, 16); \ + a ^= c; a -= rol32(c, 4); \ + b ^= a; b -= rol32(a, 14); \ + c ^= b; c -= rol32(b, 24); \ +} + +#define JHASH_INITVAL 0xdeadbeef + +typedef unsigned int u32; + +static __attribute__ ((noinline)) +u32 jhash(const void *key, u32 length, u32 initval) +{ + u32 a, b, c; + const unsigned char *k = key; + + a = b = c = JHASH_INITVAL + length + initval; + + while (length > 12) { + a += *(u32 *)(k); + b += *(u32 *)(k + 4); + c += *(u32 *)(k + 8); + __jhash_mix(a, b, c); + length -= 12; + k += 12; + } + switch (length) { + case 12: c += (u32)k[11]<<24; + case 11: c += (u32)k[10]<<16; + case 10: c += (u32)k[9]<<8; + case 9: c += k[8]; + case 8: b += (u32)k[7]<<24; + case 7: b += (u32)k[6]<<16; + case 6: b += (u32)k[5]<<8; + case 5: b += k[4]; + case 4: a += (u32)k[3]<<24; + case 3: a += (u32)k[2]<<16; + case 2: a += (u32)k[1]<<8; + case 1: a += k[0]; + __jhash_final(a, b, c); + case 0: /* Nothing left to add */ + break; + } + + return c; +} + +static __attribute__ ((noinline)) +u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) +{ + a += initval; + b += initval; + c += initval; + __jhash_final(a, b, c); + return c; +} + +static __attribute__ ((noinline)) +u32 jhash_2words(u32 a, u32 b, u32 initval) +{ + return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); +} + +struct flow_key { + union { + __be32 src; + __be32 srcv6[4]; + }; + union { + __be32 dst; + __be32 dstv6[4]; + }; + union { + __u32 ports; + __u16 port16[2]; + }; + __u8 proto; +}; + +struct packet_description { + struct flow_key flow; + __u8 flags; +}; + +struct ctl_value { + union { + __u64 value; + __u32 ifindex; + __u8 mac[6]; + }; +}; + +struct vip_definition { + union { + __be32 vip; + __be32 vipv6[4]; + }; + __u16 port; + __u16 family; + __u8 proto; +}; + +struct vip_meta { + __u32 flags; + __u32 vip_num; +}; + +struct real_pos_lru { + __u32 pos; + __u64 atime; +}; + +struct real_definition { + union { + __be32 dst; + __be32 dstv6[4]; + }; + __u8 flags; +}; + +struct lb_stats { + __u64 v2; + __u64 v1; +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) vip_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct vip_definition), + .value_size = sizeof(struct vip_meta), + .max_entries = 512, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) lru_cache = { + .type = BPF_MAP_TYPE_LRU_HASH, + .key_size = sizeof(struct flow_key), + .value_size = sizeof(struct real_pos_lru), + .max_entries = 300, + .map_flags = 1U << 1, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) ch_rings = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 12 * 655, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) reals = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct real_definition), + .max_entries = 40, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) stats = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct lb_stats), + .max_entries = 515, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) ctl_array = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct ctl_value), + .max_entries = 16, + .map_flags = 0, +}; + +struct eth_hdr { + unsigned char eth_dest[6]; + unsigned char eth_source[6]; + unsigned short eth_proto; +}; + +static inline __u64 calc_offset(bool is_ipv6, bool is_icmp) +{ + __u64 off = sizeof(struct eth_hdr); + if (is_ipv6) { + off += sizeof(struct ipv6hdr); + if (is_icmp) + off += sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr); + } else { + off += sizeof(struct iphdr); + if (is_icmp) + off += sizeof(struct icmphdr) + sizeof(struct iphdr); + } + return off; +} + +static __attribute__ ((noinline)) +bool parse_udp(void *data, void *data_end, + bool is_ipv6, struct packet_description *pckt) +{ + + bool is_icmp = !((pckt->flags & (1 << 0)) == 0); + __u64 off = calc_offset(is_ipv6, is_icmp); + struct udphdr *udp; + udp = data + off; + + if (udp + 1 > data_end) + return 0; + if (!is_icmp) { + pckt->flow.port16[0] = udp->source; + pckt->flow.port16[1] = udp->dest; + } else { + pckt->flow.port16[0] = udp->dest; + pckt->flow.port16[1] = udp->source; + } + return 1; +} + +static __attribute__ ((noinline)) +bool parse_tcp(void *data, void *data_end, + bool is_ipv6, struct packet_description *pckt) +{ + + bool is_icmp = !((pckt->flags & (1 << 0)) == 0); + __u64 off = calc_offset(is_ipv6, is_icmp); + struct tcphdr *tcp; + + tcp = data + off; + if (tcp + 1 > data_end) + return 0; + if (tcp->syn) + pckt->flags |= (1 << 1); + if (!is_icmp) { + pckt->flow.port16[0] = tcp->source; + pckt->flow.port16[1] = tcp->dest; + } else { + pckt->flow.port16[0] = tcp->dest; + pckt->flow.port16[1] = tcp->source; + } + return 1; +} + +static __attribute__ ((noinline)) +bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval, + struct packet_description *pckt, + struct real_definition *dst, __u32 pkt_bytes) +{ + struct eth_hdr *new_eth; + struct eth_hdr *old_eth; + struct ipv6hdr *ip6h; + __u32 ip_suffix; + void *data_end; + void *data; + + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr))) + return 0; + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + new_eth = data; + ip6h = data + sizeof(struct eth_hdr); + old_eth = data + sizeof(struct ipv6hdr); + if (new_eth + 1 > data_end || + old_eth + 1 > data_end || ip6h + 1 > data_end) + return 0; + memcpy(new_eth->eth_dest, cval->mac, 6); + memcpy(new_eth->eth_source, old_eth->eth_dest, 6); + new_eth->eth_proto = 56710; + ip6h->version = 6; + ip6h->priority = 0; + memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl)); + + ip6h->nexthdr = IPPROTO_IPV6; + ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0]; + ip6h->payload_len = + __builtin_bswap16(pkt_bytes + sizeof(struct ipv6hdr)); + ip6h->hop_limit = 4; + + ip6h->saddr.in6_u.u6_addr32[0] = 1; + ip6h->saddr.in6_u.u6_addr32[1] = 2; + ip6h->saddr.in6_u.u6_addr32[2] = 3; + ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix; + memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16); + return 1; +} + +static __attribute__ ((noinline)) +bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, + struct packet_description *pckt, + struct real_definition *dst, __u32 pkt_bytes) +{ + + __u32 ip_suffix = __builtin_bswap16(pckt->flow.port16[0]); + struct eth_hdr *new_eth; + struct eth_hdr *old_eth; + __u16 *next_iph_u16; + struct iphdr *iph; + __u32 csum = 0; + void *data_end; + void *data; + + ip_suffix <<= 15; + ip_suffix ^= pckt->flow.src; + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr))) + return 0; + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + new_eth = data; + iph = data + sizeof(struct eth_hdr); + old_eth = data + sizeof(struct iphdr); + if (new_eth + 1 > data_end || + old_eth + 1 > data_end || iph + 1 > data_end) + return 0; + memcpy(new_eth->eth_dest, cval->mac, 6); + memcpy(new_eth->eth_source, old_eth->eth_dest, 6); + new_eth->eth_proto = 8; + iph->version = 4; + iph->ihl = 5; + iph->frag_off = 0; + iph->protocol = IPPROTO_IPIP; + iph->check = 0; + iph->tos = 1; + iph->tot_len = __builtin_bswap16(pkt_bytes + sizeof(struct iphdr)); + /* don't update iph->daddr, since it will overwrite old eth_proto + * and multiple iterations of bpf_prog_run() will fail + */ + + iph->saddr = ((0xFFFF0000 & ip_suffix) | 4268) ^ dst->dst; + iph->ttl = 4; + + next_iph_u16 = (__u16 *) iph; +#pragma clang loop unroll(full) + for (int i = 0; i < sizeof(struct iphdr) >> 1; i++) + csum += *next_iph_u16++; + iph->check = ~((csum & 0xffff) + (csum >> 16)); + if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) + return 0; + return 1; +} + +static __attribute__ ((noinline)) +bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4) +{ + struct eth_hdr *new_eth; + struct eth_hdr *old_eth; + + old_eth = *data; + new_eth = *data + sizeof(struct ipv6hdr); + memcpy(new_eth->eth_source, old_eth->eth_source, 6); + memcpy(new_eth->eth_dest, old_eth->eth_dest, 6); + if (inner_v4) + new_eth->eth_proto = 8; + else + new_eth->eth_proto = 56710; + if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr))) + return 0; + *data = (void *)(long)xdp->data; + *data_end = (void *)(long)xdp->data_end; + return 1; +} + +static __attribute__ ((noinline)) +bool decap_v4(struct xdp_md *xdp, void **data, void **data_end) +{ + struct eth_hdr *new_eth; + struct eth_hdr *old_eth; + + old_eth = *data; + new_eth = *data + sizeof(struct iphdr); + memcpy(new_eth->eth_source, old_eth->eth_source, 6); + memcpy(new_eth->eth_dest, old_eth->eth_dest, 6); + new_eth->eth_proto = 8; + if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) + return 0; + *data = (void *)(long)xdp->data; + *data_end = (void *)(long)xdp->data_end; + return 1; +} + +static __attribute__ ((noinline)) +int swap_mac_and_send(void *data, void *data_end) +{ + unsigned char tmp_mac[6]; + struct eth_hdr *eth; + + eth = data; + memcpy(tmp_mac, eth->eth_source, 6); + memcpy(eth->eth_source, eth->eth_dest, 6); + memcpy(eth->eth_dest, tmp_mac, 6); + return XDP_TX; +} + +static __attribute__ ((noinline)) +int send_icmp_reply(void *data, void *data_end) +{ + struct icmphdr *icmp_hdr; + __u16 *next_iph_u16; + __u32 tmp_addr = 0; + struct iphdr *iph; + __u32 csum1 = 0; + __u32 csum = 0; + __u64 off = 0; + + if (data + sizeof(struct eth_hdr) + + sizeof(struct iphdr) + sizeof(struct icmphdr) > data_end) + return XDP_DROP; + off += sizeof(struct eth_hdr); + iph = data + off; + off += sizeof(struct iphdr); + icmp_hdr = data + off; + icmp_hdr->type = 0; + icmp_hdr->checksum += 0x0007; + iph->ttl = 4; + tmp_addr = iph->daddr; + iph->daddr = iph->saddr; + iph->saddr = tmp_addr; + iph->check = 0; + next_iph_u16 = (__u16 *) iph; +#pragma clang loop unroll(full) + for (int i = 0; i < sizeof(struct iphdr) >> 1; i++) + csum += *next_iph_u16++; + iph->check = ~((csum & 0xffff) + (csum >> 16)); + return swap_mac_and_send(data, data_end); +} + +static __attribute__ ((noinline)) +int send_icmp6_reply(void *data, void *data_end) +{ + struct icmp6hdr *icmp_hdr; + struct ipv6hdr *ip6h; + __be32 tmp_addr[4]; + __u64 off = 0; + + if (data + sizeof(struct eth_hdr) + + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) > data_end) + return XDP_DROP; + off += sizeof(struct eth_hdr); + ip6h = data + off; + off += sizeof(struct ipv6hdr); + icmp_hdr = data + off; + icmp_hdr->icmp6_type = 129; + icmp_hdr->icmp6_cksum -= 0x0001; + ip6h->hop_limit = 4; + memcpy(tmp_addr, ip6h->saddr.in6_u.u6_addr32, 16); + memcpy(ip6h->saddr.in6_u.u6_addr32, ip6h->daddr.in6_u.u6_addr32, 16); + memcpy(ip6h->daddr.in6_u.u6_addr32, tmp_addr, 16); + return swap_mac_and_send(data, data_end); +} + +static __attribute__ ((noinline)) +int parse_icmpv6(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmp6hdr *icmp_hdr; + struct ipv6hdr *ip6h; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return XDP_DROP; + if (icmp_hdr->icmp6_type == 128) + return send_icmp6_reply(data, data_end); + if (icmp_hdr->icmp6_type != 3) + return XDP_PASS; + off += sizeof(struct icmp6hdr); + ip6h = data + off; + if (ip6h + 1 > data_end) + return XDP_DROP; + pckt->flow.proto = ip6h->nexthdr; + pckt->flags |= (1 << 0); + memcpy(pckt->flow.srcv6, ip6h->daddr.in6_u.u6_addr32, 16); + memcpy(pckt->flow.dstv6, ip6h->saddr.in6_u.u6_addr32, 16); + return -1; +} + +static __attribute__ ((noinline)) +int parse_icmp(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmphdr *icmp_hdr; + struct iphdr *iph; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return XDP_DROP; + if (icmp_hdr->type == 8) + return send_icmp_reply(data, data_end); + if ((icmp_hdr->type != 3) || (icmp_hdr->code != 4)) + return XDP_PASS; + off += sizeof(struct icmphdr); + iph = data + off; + if (iph + 1 > data_end) + return XDP_DROP; + if (iph->ihl != 5) + return XDP_DROP; + pckt->flow.proto = iph->protocol; + pckt->flags |= (1 << 0); + pckt->flow.src = iph->daddr; + pckt->flow.dst = iph->saddr; + return -1; +} + +static __attribute__ ((noinline)) +__u32 get_packet_hash(struct packet_description *pckt, + bool hash_16bytes) +{ + if (hash_16bytes) + return jhash_2words(jhash(pckt->flow.srcv6, 16, 12), + pckt->flow.ports, 24); + else + return jhash_2words(pckt->flow.src, pckt->flow.ports, + 24); +} + +__attribute__ ((noinline)) +static bool get_packet_dst(struct real_definition **real, + struct packet_description *pckt, + struct vip_meta *vip_info, + bool is_ipv6, void *lru_map) +{ + struct real_pos_lru new_dst_lru = { }; + bool hash_16bytes = is_ipv6; + __u32 *real_pos, hash, key; + __u64 cur_time; + + if (vip_info->flags & (1 << 2)) + hash_16bytes = 1; + if (vip_info->flags & (1 << 3)) { + pckt->flow.port16[0] = pckt->flow.port16[1]; + memset(pckt->flow.srcv6, 0, 16); + } + hash = get_packet_hash(pckt, hash_16bytes); + if (hash != 0x358459b7 /* jhash of ipv4 packet */ && + hash != 0x2f4bc6bb /* jhash of ipv6 packet */) + return 0; + key = 2 * vip_info->vip_num + hash % 2; + real_pos = bpf_map_lookup_elem(&ch_rings, &key); + if (!real_pos) + return 0; + key = *real_pos; + *real = bpf_map_lookup_elem(&reals, &key); + if (!(*real)) + return 0; + if (!(vip_info->flags & (1 << 1))) { + __u32 conn_rate_key = 512 + 2; + struct lb_stats *conn_rate_stats = + bpf_map_lookup_elem(&stats, &conn_rate_key); + + if (!conn_rate_stats) + return 1; + cur_time = bpf_ktime_get_ns(); + if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) { + conn_rate_stats->v1 = 1; + conn_rate_stats->v2 = cur_time; + } else { + conn_rate_stats->v1 += 1; + if (conn_rate_stats->v1 >= 1) + return 1; + } + if (pckt->flow.proto == IPPROTO_UDP) + new_dst_lru.atime = cur_time; + new_dst_lru.pos = key; + bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0); + } + return 1; +} + +__attribute__ ((noinline)) +static void connection_table_lookup(struct real_definition **real, + struct packet_description *pckt, + void *lru_map) +{ + + struct real_pos_lru *dst_lru; + __u64 cur_time; + __u32 key; + + dst_lru = bpf_map_lookup_elem(lru_map, &pckt->flow); + if (!dst_lru) + return; + if (pckt->flow.proto == IPPROTO_UDP) { + cur_time = bpf_ktime_get_ns(); + if (cur_time - dst_lru->atime > 300000) + return; + dst_lru->atime = cur_time; + } + key = dst_lru->pos; + *real = bpf_map_lookup_elem(&reals, &key); +} + +/* don't believe your eyes! + * below function has 6 arguments whereas bpf and llvm allow maximum of 5 + * but since it's _static_ llvm can optimize one argument away + */ +__attribute__ ((noinline)) +static int process_l3_headers_v6(struct packet_description *pckt, + __u8 *protocol, __u64 off, + __u16 *pkt_bytes, void *data, + void *data_end) +{ + struct ipv6hdr *ip6h; + __u64 iph_len; + int action; + + ip6h = data + off; + if (ip6h + 1 > data_end) + return XDP_DROP; + iph_len = sizeof(struct ipv6hdr); + *protocol = ip6h->nexthdr; + pckt->flow.proto = *protocol; + *pkt_bytes = __builtin_bswap16(ip6h->payload_len); + off += iph_len; + if (*protocol == 45) { + return XDP_DROP; + } else if (*protocol == 59) { + action = parse_icmpv6(data, data_end, off, pckt); + if (action >= 0) + return action; + } else { + memcpy(pckt->flow.srcv6, ip6h->saddr.in6_u.u6_addr32, 16); + memcpy(pckt->flow.dstv6, ip6h->daddr.in6_u.u6_addr32, 16); + } + return -1; +} + +__attribute__ ((noinline)) +static int process_l3_headers_v4(struct packet_description *pckt, + __u8 *protocol, __u64 off, + __u16 *pkt_bytes, void *data, + void *data_end) +{ + struct iphdr *iph; + __u64 iph_len; + int action; + + iph = data + off; + if (iph + 1 > data_end) + return XDP_DROP; + if (iph->ihl != 5) + return XDP_DROP; + *protocol = iph->protocol; + pckt->flow.proto = *protocol; + *pkt_bytes = __builtin_bswap16(iph->tot_len); + off += 20; + if (iph->frag_off & 65343) + return XDP_DROP; + if (*protocol == IPPROTO_ICMP) { + action = parse_icmp(data, data_end, off, pckt); + if (action >= 0) + return action; + } else { + pckt->flow.src = iph->saddr; + pckt->flow.dst = iph->daddr; + } + return -1; +} + +__attribute__ ((noinline)) +static int process_packet(void *data, __u64 off, void *data_end, + bool is_ipv6, struct xdp_md *xdp) +{ + + struct real_definition *dst = NULL; + struct packet_description pckt = { }; + struct vip_definition vip = { }; + struct lb_stats *data_stats; + struct eth_hdr *eth = data; + void *lru_map = &lru_cache; + struct vip_meta *vip_info; + __u32 lru_stats_key = 513; + __u32 mac_addr_pos = 0; + __u32 stats_key = 512; + struct ctl_value *cval; + __u16 pkt_bytes; + __u64 iph_len; + __u8 protocol; + __u32 vip_num; + int action; + + if (is_ipv6) + action = process_l3_headers_v6(&pckt, &protocol, off, + &pkt_bytes, data, data_end); + else + action = process_l3_headers_v4(&pckt, &protocol, off, + &pkt_bytes, data, data_end); + if (action >= 0) + return action; + protocol = pckt.flow.proto; + if (protocol == IPPROTO_TCP) { + if (!parse_tcp(data, data_end, is_ipv6, &pckt)) + return XDP_DROP; + } else if (protocol == IPPROTO_UDP) { + if (!parse_udp(data, data_end, is_ipv6, &pckt)) + return XDP_DROP; + } else { + return XDP_TX; + } + + if (is_ipv6) + memcpy(vip.vipv6, pckt.flow.dstv6, 16); + else + vip.vip = pckt.flow.dst; + vip.port = pckt.flow.port16[1]; + vip.proto = pckt.flow.proto; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) { + vip.port = 0; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) + return XDP_PASS; + if (!(vip_info->flags & (1 << 4))) + pckt.flow.port16[1] = 0; + } + if (data_end - data > 1400) + return XDP_DROP; + data_stats = bpf_map_lookup_elem(&stats, &stats_key); + if (!data_stats) + return XDP_DROP; + data_stats->v1 += 1; + if (!dst) { + if (vip_info->flags & (1 << 0)) + pckt.flow.port16[0] = 0; + if (!(pckt.flags & (1 << 1)) && !(vip_info->flags & (1 << 1))) + connection_table_lookup(&dst, &pckt, lru_map); + if (dst) + goto out; + if (pckt.flow.proto == IPPROTO_TCP) { + struct lb_stats *lru_stats = + bpf_map_lookup_elem(&stats, &lru_stats_key); + + if (!lru_stats) + return XDP_DROP; + if (pckt.flags & (1 << 1)) + lru_stats->v1 += 1; + else + lru_stats->v2 += 1; + } + if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6, lru_map)) + return XDP_DROP; + data_stats->v2 += 1; + } +out: + cval = bpf_map_lookup_elem(&ctl_array, &mac_addr_pos); + if (!cval) + return XDP_DROP; + if (dst->flags & (1 << 0)) { + if (!encap_v6(xdp, cval, &pckt, dst, pkt_bytes)) + return XDP_DROP; + } else { + if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes)) + return XDP_DROP; + } + vip_num = vip_info->vip_num; + data_stats = bpf_map_lookup_elem(&stats, &vip_num); + if (!data_stats) + return XDP_DROP; + data_stats->v1 += 1; + data_stats->v2 += pkt_bytes; + + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + if (data + 4 > data_end) + return XDP_DROP; + *(u32 *)data = dst->dst; + return XDP_DROP; +} + +__attribute__ ((section("xdp-test"), used)) +int balancer_ingress(struct xdp_md *ctx) +{ + void *data = (void *)(long)ctx->data; + void *data_end = (void *)(long)ctx->data_end; + struct eth_hdr *eth = data; + __u32 eth_proto; + __u32 nh_off; + + nh_off = sizeof(struct eth_hdr); + if (data + nh_off > data_end) + return XDP_DROP; + eth_proto = eth->eth_proto; + if (eth_proto == 8) + return process_packet(data, nh_off, data_end, 0, ctx); + else if (eth_proto == 56710) + return process_packet(data, nh_off, data_end, 1, ctx); + else + return XDP_DROP; +} + +char _license[] __attribute__ ((section("license"), used)) = "GPL"; +int _version __attribute__ ((section("version"), used)) = 1; diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh index 344a3656dea6..c4b17e08d431 100755 --- a/tools/testing/selftests/bpf/test_xdp_redirect.sh +++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh @@ -19,6 +19,8 @@ cleanup() fi set +e + ip link del veth1 2> /dev/null + ip link del veth2 2> /dev/null ip netns del ns1 2> /dev/null ip netns del ns2 2> /dev/null } diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c new file mode 100644 index 000000000000..4acfdebf36fa --- /dev/null +++ b/tools/testing/selftests/bpf/urandom_read.c @@ -0,0 +1,22 @@ +#include <stdio.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdlib.h> + +#define BUF_SIZE 256 +int main(void) +{ + int fd = open("/dev/urandom", O_RDONLY); + int i; + char buf[BUF_SIZE]; + + if (fd < 0) + return 1; + for (i = 0; i < 4; ++i) + read(fd, buf, BUF_SIZE); + + close(fd); + return 0; +} diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore index 31d6e426b6d4..8449cf6716ce 100644 --- a/tools/testing/selftests/filesystems/.gitignore +++ b/tools/testing/selftests/filesystems/.gitignore @@ -1 +1,2 @@ dnotify_test +devpts_pts diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile index 13a73bf725b5..4e6d09fb166f 100644 --- a/tools/testing/selftests/filesystems/Makefile +++ b/tools/testing/selftests/filesystems/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS := dnotify_test +TEST_PROGS := dnotify_test devpts_pts all: $(TEST_PROGS) include ../lib.mk diff --git a/tools/testing/selftests/filesystems/devpts_pts.c b/tools/testing/selftests/filesystems/devpts_pts.c new file mode 100644 index 000000000000..b9055e974289 --- /dev/null +++ b/tools/testing/selftests/filesystems/devpts_pts.c @@ -0,0 +1,313 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/ioctl.h> +#include <sys/mount.h> +#include <sys/wait.h> + +static bool terminal_dup2(int duplicate, int original) +{ + int ret; + + ret = dup2(duplicate, original); + if (ret < 0) + return false; + + return true; +} + +static int terminal_set_stdfds(int fd) +{ + int i; + + if (fd < 0) + return 0; + + for (i = 0; i < 3; i++) + if (!terminal_dup2(fd, (int[]){STDIN_FILENO, STDOUT_FILENO, + STDERR_FILENO}[i])) + return -1; + + return 0; +} + +static int login_pty(int fd) +{ + int ret; + + setsid(); + + ret = ioctl(fd, TIOCSCTTY, NULL); + if (ret < 0) + return -1; + + ret = terminal_set_stdfds(fd); + if (ret < 0) + return -1; + + if (fd > STDERR_FILENO) + close(fd); + + return 0; +} + +static int wait_for_pid(pid_t pid) +{ + int status, ret; + +again: + ret = waitpid(pid, &status, 0); + if (ret == -1) { + if (errno == EINTR) + goto again; + return -1; + } + if (ret != pid) + goto again; + + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) + return -1; + + return 0; +} + +static int resolve_procfd_symlink(int fd, char *buf, size_t buflen) +{ + int ret; + char procfd[4096]; + + ret = snprintf(procfd, 4096, "/proc/self/fd/%d", fd); + if (ret < 0 || ret >= 4096) + return -1; + + ret = readlink(procfd, buf, buflen); + if (ret < 0 || (size_t)ret >= buflen) + return -1; + + buf[ret] = '\0'; + + return 0; +} + +static int do_tiocgptpeer(char *ptmx, char *expected_procfd_contents) +{ + int ret; + int master = -1, slave = -1, fret = -1; + + master = open(ptmx, O_RDWR | O_NOCTTY | O_CLOEXEC); + if (master < 0) { + fprintf(stderr, "Failed to open \"%s\": %s\n", ptmx, + strerror(errno)); + return -1; + } + + /* + * grantpt() makes assumptions about /dev/pts/ so ignore it. It's also + * not really needed. + */ + ret = unlockpt(master); + if (ret < 0) { + fprintf(stderr, "Failed to unlock terminal\n"); + goto do_cleanup; + } + +#ifdef TIOCGPTPEER + slave = ioctl(master, TIOCGPTPEER, O_RDWR | O_NOCTTY | O_CLOEXEC); +#endif + if (slave < 0) { + if (errno == EINVAL) { + fprintf(stderr, "TIOCGPTPEER is not supported. " + "Skipping test.\n"); + fret = EXIT_SUCCESS; + } + + fprintf(stderr, "Failed to perform TIOCGPTPEER ioctl\n"); + goto do_cleanup; + } + + pid_t pid = fork(); + if (pid < 0) + goto do_cleanup; + + if (pid == 0) { + char buf[4096]; + + ret = login_pty(slave); + if (ret < 0) { + fprintf(stderr, "Failed to setup terminal\n"); + _exit(EXIT_FAILURE); + } + + ret = resolve_procfd_symlink(STDIN_FILENO, buf, sizeof(buf)); + if (ret < 0) { + fprintf(stderr, "Failed to retrieve pathname of pts " + "slave file descriptor\n"); + _exit(EXIT_FAILURE); + } + + if (strncmp(expected_procfd_contents, buf, + strlen(expected_procfd_contents)) != 0) { + fprintf(stderr, "Received invalid contents for " + "\"/proc/<pid>/fd/%d\" symlink: %s\n", + STDIN_FILENO, buf); + _exit(-1); + } + + fprintf(stderr, "Contents of \"/proc/<pid>/fd/%d\" " + "symlink are valid: %s\n", STDIN_FILENO, buf); + + _exit(EXIT_SUCCESS); + } + + ret = wait_for_pid(pid); + if (ret < 0) + goto do_cleanup; + + fret = EXIT_SUCCESS; + +do_cleanup: + if (master >= 0) + close(master); + if (slave >= 0) + close(slave); + + return fret; +} + +static int verify_non_standard_devpts_mount(void) +{ + char *mntpoint; + int ret = -1; + char devpts[] = P_tmpdir "/devpts_fs_XXXXXX"; + char ptmx[] = P_tmpdir "/devpts_fs_XXXXXX/ptmx"; + + ret = umount("/dev/pts"); + if (ret < 0) { + fprintf(stderr, "Failed to unmount \"/dev/pts\": %s\n", + strerror(errno)); + return -1; + } + + (void)umount("/dev/ptmx"); + + mntpoint = mkdtemp(devpts); + if (!mntpoint) { + fprintf(stderr, "Failed to create temporary mountpoint: %s\n", + strerror(errno)); + return -1; + } + + ret = mount("devpts", mntpoint, "devpts", MS_NOSUID | MS_NOEXEC, + "newinstance,ptmxmode=0666,mode=0620,gid=5"); + if (ret < 0) { + fprintf(stderr, "Failed to mount devpts fs to \"%s\" in new " + "mount namespace: %s\n", mntpoint, + strerror(errno)); + unlink(mntpoint); + return -1; + } + + ret = snprintf(ptmx, sizeof(ptmx), "%s/ptmx", devpts); + if (ret < 0 || (size_t)ret >= sizeof(ptmx)) { + unlink(mntpoint); + return -1; + } + + ret = do_tiocgptpeer(ptmx, mntpoint); + unlink(mntpoint); + if (ret < 0) + return -1; + + return 0; +} + +static int verify_ptmx_bind_mount(void) +{ + int ret; + + ret = mount("/dev/pts/ptmx", "/dev/ptmx", NULL, MS_BIND, NULL); + if (ret < 0) { + fprintf(stderr, "Failed to bind mount \"/dev/pts/ptmx\" to " + "\"/dev/ptmx\" mount namespace\n"); + return -1; + } + + ret = do_tiocgptpeer("/dev/ptmx", "/dev/pts/"); + if (ret < 0) + return -1; + + return 0; +} + +static int verify_invalid_ptmx_bind_mount(void) +{ + int ret; + char mntpoint_fd; + char ptmx[] = P_tmpdir "/devpts_ptmx_XXXXXX"; + + mntpoint_fd = mkstemp(ptmx); + if (mntpoint_fd < 0) { + fprintf(stderr, "Failed to create temporary directory: %s\n", + strerror(errno)); + return -1; + } + + ret = mount("/dev/pts/ptmx", ptmx, NULL, MS_BIND, NULL); + close(mntpoint_fd); + if (ret < 0) { + fprintf(stderr, "Failed to bind mount \"/dev/pts/ptmx\" to " + "\"%s\" mount namespace\n", ptmx); + return -1; + } + + ret = do_tiocgptpeer(ptmx, "/dev/pts/"); + if (ret == 0) + return -1; + + return 0; +} + +int main(int argc, char *argv[]) +{ + int ret; + + if (!isatty(STDIN_FILENO)) { + fprintf(stderr, "Standard input file desciptor is not attached " + "to a terminal. Skipping test\n"); + exit(EXIT_FAILURE); + } + + ret = unshare(CLONE_NEWNS); + if (ret < 0) { + fprintf(stderr, "Failed to unshare mount namespace\n"); + exit(EXIT_FAILURE); + } + + ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); + if (ret < 0) { + fprintf(stderr, "Failed to make \"/\" MS_PRIVATE in new mount " + "namespace\n"); + exit(EXIT_FAILURE); + } + + ret = verify_ptmx_bind_mount(); + if (ret < 0) + exit(EXIT_FAILURE); + + ret = verify_invalid_ptmx_bind_mount(); + if (ret < 0) + exit(EXIT_FAILURE); + + ret = verify_non_standard_devpts_mount(); + if (ret < 0) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); +} diff --git a/tools/testing/selftests/firmware/Makefile b/tools/testing/selftests/firmware/Makefile index 1894d625af2d..826f38d5dd19 100644 --- a/tools/testing/selftests/firmware/Makefile +++ b/tools/testing/selftests/firmware/Makefile @@ -3,7 +3,7 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests" all: -TEST_PROGS := fw_filesystem.sh fw_fallback.sh +TEST_PROGS := fw_run_tests.sh include ../lib.mk diff --git a/tools/testing/selftests/firmware/config b/tools/testing/selftests/firmware/config index c8137f70e291..bf634dda0720 100644 --- a/tools/testing/selftests/firmware/config +++ b/tools/testing/selftests/firmware/config @@ -1 +1,5 @@ CONFIG_TEST_FIRMWARE=y +CONFIG_FW_LOADER=y +CONFIG_FW_LOADER_USER_HELPER=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y diff --git a/tools/testing/selftests/firmware/fw_fallback.sh b/tools/testing/selftests/firmware/fw_fallback.sh index 34a42c68ebfb..8e2e34a2ca69 100755 --- a/tools/testing/selftests/firmware/fw_fallback.sh +++ b/tools/testing/selftests/firmware/fw_fallback.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # This validates that the kernel will fall back to using the fallback mechanism # to load firmware it can't find on disk itself. We must request a firmware @@ -6,31 +6,17 @@ # won't find so that we can do the load ourself manually. set -e -modprobe test_firmware +TEST_REQS_FW_SYSFS_FALLBACK="yes" +TEST_REQS_FW_SET_CUSTOM_PATH="no" +TEST_DIR=$(dirname $0) +source $TEST_DIR/fw_lib.sh -DIR=/sys/devices/virtual/misc/test_firmware +check_mods +check_setup +verify_reqs +setup_tmp_file -# CONFIG_FW_LOADER_USER_HELPER has a sysfs class under /sys/class/firmware/ -# These days no one enables CONFIG_FW_LOADER_USER_HELPER so check for that -# as an indicator for CONFIG_FW_LOADER_USER_HELPER. -HAS_FW_LOADER_USER_HELPER=$(if [ -d /sys/class/firmware/ ]; then echo yes; else echo no; fi) - -if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then - OLD_TIMEOUT=$(cat /sys/class/firmware/timeout) -else - echo "usermode helper disabled so ignoring test" - exit 0 -fi - -FWPATH=$(mktemp -d) -FW="$FWPATH/test-firmware.bin" - -test_finish() -{ - echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout - rm -f "$FW" - rmdir "$FWPATH" -} +trap "test_finish" EXIT load_fw() { @@ -169,102 +155,129 @@ load_fw_fallback_with_child() return $RET } -trap "test_finish" EXIT - -# This is an unlikely real-world firmware content. :) -echo "ABCD0123" >"$FW" -NAME=$(basename "$FW") - -DEVPATH="$DIR"/"nope-$NAME"/loading - -# Test failure when doing nothing (timeout works). -echo -n 2 >/sys/class/firmware/timeout -echo -n "nope-$NAME" >"$DIR"/trigger_request 2>/dev/null & - -# Give the kernel some time to load the loading file, must be less -# than the timeout above. -sleep 1 -if [ ! -f $DEVPATH ]; then - echo "$0: fallback mechanism immediately cancelled" - echo "" - echo "The file never appeared: $DEVPATH" - echo "" - echo "This might be a distribution udev rule setup by your distribution" - echo "to immediately cancel all fallback requests, this must be" - echo "removed before running these tests. To confirm look for" - echo "a firmware rule like /lib/udev/rules.d/50-firmware.rules" - echo "and see if you have something like this:" - echo "" - echo "SUBSYSTEM==\"firmware\", ACTION==\"add\", ATTR{loading}=\"-1\"" - echo "" - echo "If you do remove this file or comment out this line before" - echo "proceeding with these tests." - exit 1 -fi - -if diff -q "$FW" /dev/test_firmware >/dev/null ; then - echo "$0: firmware was not expected to match" >&2 - exit 1 -else - echo "$0: timeout works" -fi +test_syfs_timeout() +{ + DEVPATH="$DIR"/"nope-$NAME"/loading + + # Test failure when doing nothing (timeout works). + echo -n 2 >/sys/class/firmware/timeout + echo -n "nope-$NAME" >"$DIR"/trigger_request 2>/dev/null & + + # Give the kernel some time to load the loading file, must be less + # than the timeout above. + sleep 1 + if [ ! -f $DEVPATH ]; then + echo "$0: fallback mechanism immediately cancelled" + echo "" + echo "The file never appeared: $DEVPATH" + echo "" + echo "This might be a distribution udev rule setup by your distribution" + echo "to immediately cancel all fallback requests, this must be" + echo "removed before running these tests. To confirm look for" + echo "a firmware rule like /lib/udev/rules.d/50-firmware.rules" + echo "and see if you have something like this:" + echo "" + echo "SUBSYSTEM==\"firmware\", ACTION==\"add\", ATTR{loading}=\"-1\"" + echo "" + echo "If you do remove this file or comment out this line before" + echo "proceeding with these tests." + exit 1 + fi -# Put timeout high enough for us to do work but not so long that failures -# slow down this test too much. -echo 4 >/sys/class/firmware/timeout - -# Load this script instead of the desired firmware. -load_fw "$NAME" "$0" -if diff -q "$FW" /dev/test_firmware >/dev/null ; then - echo "$0: firmware was not expected to match" >&2 - exit 1 -else - echo "$0: firmware comparison works" -fi + if diff -q "$FW" /dev/test_firmware >/dev/null ; then + echo "$0: firmware was not expected to match" >&2 + exit 1 + else + echo "$0: timeout works" + fi +} -# Do a proper load, which should work correctly. -load_fw "$NAME" "$FW" -if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then - echo "$0: firmware was not loaded" >&2 - exit 1 -else - echo "$0: fallback mechanism works" -fi +run_sysfs_main_tests() +{ + test_syfs_timeout + # Put timeout high enough for us to do work but not so long that failures + # slow down this test too much. + echo 4 >/sys/class/firmware/timeout -load_fw_cancel "nope-$NAME" "$FW" -if diff -q "$FW" /dev/test_firmware >/dev/null ; then - echo "$0: firmware was expected to be cancelled" >&2 - exit 1 -else - echo "$0: cancelling fallback mechanism works" -fi + # Load this script instead of the desired firmware. + load_fw "$NAME" "$0" + if diff -q "$FW" /dev/test_firmware >/dev/null ; then + echo "$0: firmware was not expected to match" >&2 + exit 1 + else + echo "$0: firmware comparison works" + fi -if load_fw_custom "$NAME" "$FW" ; then + # Do a proper load, which should work correctly. + load_fw "$NAME" "$FW" if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then echo "$0: firmware was not loaded" >&2 exit 1 else - echo "$0: custom fallback loading mechanism works" + echo "$0: fallback mechanism works" fi -fi -if load_fw_custom_cancel "nope-$NAME" "$FW" ; then + load_fw_cancel "nope-$NAME" "$FW" if diff -q "$FW" /dev/test_firmware >/dev/null ; then echo "$0: firmware was expected to be cancelled" >&2 exit 1 else - echo "$0: cancelling custom fallback mechanism works" + echo "$0: cancelling fallback mechanism works" + fi + + set +e + load_fw_fallback_with_child "nope-signal-$NAME" "$FW" + if [ "$?" -eq 0 ]; then + echo "$0: SIGCHLD on sync ignored as expected" >&2 + else + echo "$0: error - sync firmware request cancelled due to SIGCHLD" >&2 + exit 1 + fi + set -e +} + +run_sysfs_custom_load_tests() +{ + RANDOM_FILE_PATH=$(setup_random_file) + RANDOM_FILE="$(basename $RANDOM_FILE_PATH)" + if load_fw_custom "$RANDOM_FILE" "$RANDOM_FILE_PATH" ; then + if ! diff -q "$RANDOM_FILE_PATH" /dev/test_firmware >/dev/null ; then + echo "$0: firmware was not loaded" >&2 + exit 1 + else + echo "$0: custom fallback loading mechanism works" + fi fi -fi -set +e -load_fw_fallback_with_child "nope-signal-$NAME" "$FW" -if [ "$?" -eq 0 ]; then - echo "$0: SIGCHLD on sync ignored as expected" >&2 -else - echo "$0: error - sync firmware request cancelled due to SIGCHLD" >&2 - exit 1 + RANDOM_FILE_PATH=$(setup_random_file) + RANDOM_FILE="$(basename $RANDOM_FILE_PATH)" + if load_fw_custom "$RANDOM_FILE" "$RANDOM_FILE_PATH" ; then + if ! diff -q "$RANDOM_FILE_PATH" /dev/test_firmware >/dev/null ; then + echo "$0: firmware was not loaded" >&2 + exit 1 + else + echo "$0: custom fallback loading mechanism works" + fi + fi + + RANDOM_FILE_REAL="$RANDOM_FILE_PATH" + FAKE_RANDOM_FILE_PATH=$(setup_random_file_fake) + FAKE_RANDOM_FILE="$(basename $FAKE_RANDOM_FILE_PATH)" + + if load_fw_custom_cancel "$FAKE_RANDOM_FILE" "$RANDOM_FILE_REAL" ; then + if diff -q "$RANDOM_FILE_PATH" /dev/test_firmware >/dev/null ; then + echo "$0: firmware was expected to be cancelled" >&2 + exit 1 + else + echo "$0: cancelling custom fallback mechanism works" + fi + fi +} + +if [ "$HAS_FW_LOADER_USER_HELPER_FALLBACK" = "yes" ]; then + run_sysfs_main_tests fi -set -e + +run_sysfs_custom_load_tests exit 0 diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh index b1f20fef36c7..6452d2129cd9 100755 --- a/tools/testing/selftests/firmware/fw_filesystem.sh +++ b/tools/testing/selftests/firmware/fw_filesystem.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # This validates that the kernel will load firmware out of its list of # firmware locations on disk. Since the user helper does similar work, @@ -6,49 +6,15 @@ # know so we can be sure we're not accidentally testing the user helper. set -e -DIR=/sys/devices/virtual/misc/test_firmware +TEST_REQS_FW_SYSFS_FALLBACK="no" +TEST_REQS_FW_SET_CUSTOM_PATH="yes" TEST_DIR=$(dirname $0) +source $TEST_DIR/fw_lib.sh -test_modprobe() -{ - if [ ! -d $DIR ]; then - echo "$0: $DIR not present" - echo "You must have the following enabled in your kernel:" - cat $TEST_DIR/config - exit 1 - fi -} - -trap "test_modprobe" EXIT - -if [ ! -d $DIR ]; then - modprobe test_firmware -fi - -# CONFIG_FW_LOADER_USER_HELPER has a sysfs class under /sys/class/firmware/ -# These days most distros enable CONFIG_FW_LOADER_USER_HELPER but disable -# CONFIG_FW_LOADER_USER_HELPER_FALLBACK. We use /sys/class/firmware/ as an -# indicator for CONFIG_FW_LOADER_USER_HELPER. -HAS_FW_LOADER_USER_HELPER=$(if [ -d /sys/class/firmware/ ]; then echo yes; else echo no; fi) - -if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then - OLD_TIMEOUT=$(cat /sys/class/firmware/timeout) -fi - -OLD_FWPATH=$(cat /sys/module/firmware_class/parameters/path) - -FWPATH=$(mktemp -d) -FW="$FWPATH/test-firmware.bin" - -test_finish() -{ - if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then - echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout - fi - echo -n "$OLD_PATH" >/sys/module/firmware_class/parameters/path - rm -f "$FW" - rmdir "$FWPATH" -} +check_mods +check_setup +verify_reqs +setup_tmp_file trap "test_finish" EXIT @@ -57,14 +23,6 @@ if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then echo 1 >/sys/class/firmware/timeout fi -# Set the kernel search path. -echo -n "$FWPATH" >/sys/module/firmware_class/parameters/path - -# This is an unlikely real-world firmware content. :) -echo "ABCD0123" >"$FW" - -NAME=$(basename "$FW") - if printf '\000' >"$DIR"/trigger_request 2> /dev/null; then echo "$0: empty filename should not succeed" >&2 exit 1 @@ -272,10 +230,13 @@ test_wait_and_cancel_custom_load() test_request_firmware_nowait_custom_nofile() { echo -n "Batched request_firmware_nowait(uevent=false) nofile try #$1: " + config_reset config_unset_uevent - config_set_name nope-test-firmware.bin + RANDOM_FILE_PATH=$(setup_random_file_fake) + RANDOM_FILE="$(basename $RANDOM_FILE_PATH)" + config_set_name $RANDOM_FILE config_trigger_async & - test_wait_and_cancel_custom_load nope-test-firmware.bin + test_wait_and_cancel_custom_load $RANDOM_FILE wait release_all_firmware echo "OK" @@ -313,7 +274,11 @@ test_request_firmware_nowait_uevent() test_request_firmware_nowait_custom() { echo -n "Batched request_firmware_nowait(uevent=false) try #$1: " + config_reset config_unset_uevent + RANDOM_FILE_PATH=$(setup_random_file) + RANDOM_FILE="$(basename $RANDOM_FILE_PATH)" + config_set_name $RANDOM_FILE config_trigger_async release_all_firmware echo "OK" diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh new file mode 100755 index 000000000000..9ea31b57d71a --- /dev/null +++ b/tools/testing/selftests/firmware/fw_lib.sh @@ -0,0 +1,194 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Library of helpers for test scripts. +set -e + +DIR=/sys/devices/virtual/misc/test_firmware + +PROC_CONFIG="/proc/config.gz" +TEST_DIR=$(dirname $0) + +print_reqs_exit() +{ + echo "You must have the following enabled in your kernel:" >&2 + cat $TEST_DIR/config >&2 + exit 1 +} + +test_modprobe() +{ + if [ ! -d $DIR ]; then + print_reqs_exit + fi +} + +check_mods() +{ + trap "test_modprobe" EXIT + if [ ! -d $DIR ]; then + modprobe test_firmware + fi + if [ ! -f $PROC_CONFIG ]; then + if modprobe configs 2>/dev/null; then + echo "Loaded configs module" + if [ ! -f $PROC_CONFIG ]; then + echo "You must have the following enabled in your kernel:" >&2 + cat $TEST_DIR/config >&2 + echo "Resorting to old heuristics" >&2 + fi + else + echo "Failed to load configs module, using old heuristics" >&2 + fi + fi +} + +check_setup() +{ + HAS_FW_LOADER_USER_HELPER="$(kconfig_has CONFIG_FW_LOADER_USER_HELPER=y)" + HAS_FW_LOADER_USER_HELPER_FALLBACK="$(kconfig_has CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y)" + PROC_FW_IGNORE_SYSFS_FALLBACK="0" + PROC_FW_FORCE_SYSFS_FALLBACK="0" + + if [ -z $PROC_SYS_DIR ]; then + PROC_SYS_DIR="/proc/sys/kernel" + fi + + FW_PROC="${PROC_SYS_DIR}/firmware_config" + FW_FORCE_SYSFS_FALLBACK="$FW_PROC/force_sysfs_fallback" + FW_IGNORE_SYSFS_FALLBACK="$FW_PROC/ignore_sysfs_fallback" + + if [ -f $FW_FORCE_SYSFS_FALLBACK ]; then + PROC_FW_FORCE_SYSFS_FALLBACK="$(cat $FW_FORCE_SYSFS_FALLBACK)" + fi + + if [ -f $FW_IGNORE_SYSFS_FALLBACK ]; then + PROC_FW_IGNORE_SYSFS_FALLBACK="$(cat $FW_IGNORE_SYSFS_FALLBACK)" + fi + + if [ "$PROC_FW_FORCE_SYSFS_FALLBACK" = "1" ]; then + HAS_FW_LOADER_USER_HELPER="yes" + HAS_FW_LOADER_USER_HELPER_FALLBACK="yes" + fi + + if [ "$PROC_FW_IGNORE_SYSFS_FALLBACK" = "1" ]; then + HAS_FW_LOADER_USER_HELPER_FALLBACK="no" + HAS_FW_LOADER_USER_HELPER="no" + fi + + if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then + OLD_TIMEOUT="$(cat /sys/class/firmware/timeout)" + fi + + OLD_FWPATH="$(cat /sys/module/firmware_class/parameters/path)" +} + +verify_reqs() +{ + if [ "$TEST_REQS_FW_SYSFS_FALLBACK" = "yes" ]; then + if [ ! "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then + echo "usermode helper disabled so ignoring test" + exit 0 + fi + fi +} + +setup_tmp_file() +{ + FWPATH=$(mktemp -d) + FW="$FWPATH/test-firmware.bin" + echo "ABCD0123" >"$FW" + NAME=$(basename "$FW") + if [ "$TEST_REQS_FW_SET_CUSTOM_PATH" = "yes" ]; then + echo -n "$FWPATH" >/sys/module/firmware_class/parameters/path + fi +} + +__setup_random_file() +{ + RANDOM_FILE_PATH="$(mktemp -p $FWPATH)" + # mktemp says dry-run -n is unsafe, so... + if [[ "$1" = "fake" ]]; then + rm -rf $RANDOM_FILE_PATH + sync + else + echo "ABCD0123" >"$RANDOM_FILE_PATH" + fi + echo $RANDOM_FILE_PATH +} + +setup_random_file() +{ + echo $(__setup_random_file) +} + +setup_random_file_fake() +{ + echo $(__setup_random_file fake) +} + +proc_set_force_sysfs_fallback() +{ + if [ -f $FW_FORCE_SYSFS_FALLBACK ]; then + echo -n $1 > $FW_FORCE_SYSFS_FALLBACK + check_setup + fi +} + +proc_set_ignore_sysfs_fallback() +{ + if [ -f $FW_IGNORE_SYSFS_FALLBACK ]; then + echo -n $1 > $FW_IGNORE_SYSFS_FALLBACK + check_setup + fi +} + +proc_restore_defaults() +{ + proc_set_force_sysfs_fallback 0 + proc_set_ignore_sysfs_fallback 0 +} + +test_finish() +{ + if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then + echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout + fi + if [ "$OLD_FWPATH" = "" ]; then + OLD_FWPATH=" " + fi + if [ "$TEST_REQS_FW_SET_CUSTOM_PATH" = "yes" ]; then + echo -n "$OLD_FWPATH" >/sys/module/firmware_class/parameters/path + fi + if [ -f $FW ]; then + rm -f "$FW" + fi + if [ -d $FWPATH ]; then + rm -rf "$FWPATH" + fi + proc_restore_defaults +} + +kconfig_has() +{ + if [ -f $PROC_CONFIG ]; then + if zgrep -q $1 $PROC_CONFIG 2>/dev/null; then + echo "yes" + else + echo "no" + fi + else + # We currently don't have easy heuristics to infer this + # so best we can do is just try to use the kernel assuming + # you had enabled it. This matches the old behaviour. + if [ "$1" = "CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y" ]; then + echo "yes" + elif [ "$1" = "CONFIG_FW_LOADER_USER_HELPER=y" ]; then + if [ -d /sys/class/firmware/ ]; then + echo yes + else + echo no + fi + fi + fi +} diff --git a/tools/testing/selftests/firmware/fw_run_tests.sh b/tools/testing/selftests/firmware/fw_run_tests.sh new file mode 100755 index 000000000000..06d638e9dc62 --- /dev/null +++ b/tools/testing/selftests/firmware/fw_run_tests.sh @@ -0,0 +1,70 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This runs all known tests across all known possible configurations we could +# emulate in one run. + +set -e + +TEST_DIR=$(dirname $0) +source $TEST_DIR/fw_lib.sh + +export HAS_FW_LOADER_USER_HELPER="" +export HAS_FW_LOADER_USER_HELPER_FALLBACK="" + +run_tests() +{ + proc_set_force_sysfs_fallback $1 + proc_set_ignore_sysfs_fallback $2 + $TEST_DIR/fw_filesystem.sh + + proc_set_force_sysfs_fallback $1 + proc_set_ignore_sysfs_fallback $2 + $TEST_DIR/fw_fallback.sh +} + +run_test_config_0001() +{ + echo "-----------------------------------------------------" + echo "Running kernel configuration test 1 -- rare" + echo "Emulates:" + echo "CONFIG_FW_LOADER=y" + echo "CONFIG_FW_LOADER_USER_HELPER=n" + echo "CONFIG_FW_LOADER_USER_HELPER_FALLBACK=n" + run_tests 0 1 +} + +run_test_config_0002() +{ + echo "-----------------------------------------------------" + echo "Running kernel configuration test 2 -- distro" + echo "Emulates:" + echo "CONFIG_FW_LOADER=y" + echo "CONFIG_FW_LOADER_USER_HELPER=y" + echo "CONFIG_FW_LOADER_USER_HELPER_FALLBACK=n" + proc_set_ignore_sysfs_fallback 0 + run_tests 0 0 +} + +run_test_config_0003() +{ + echo "-----------------------------------------------------" + echo "Running kernel configuration test 3 -- android" + echo "Emulates:" + echo "CONFIG_FW_LOADER=y" + echo "CONFIG_FW_LOADER_USER_HELPER=y" + echo "CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y" + run_tests 1 0 +} + +check_mods +check_setup + +if [ -f $FW_FORCE_SYSFS_FALLBACK ]; then + run_test_config_0001 + run_test_config_0002 + run_test_config_0003 +else + echo "Running basic kernel configuration, working with your config" + run_test +fi diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc index 589d52b211b7..27a54a17da65 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc @@ -29,6 +29,12 @@ ftrace_filter_check '*schedule*' '^.*schedule.*$' # filter by *, end match ftrace_filter_check 'schedule*' '^schedule.*$' +# filter by *mid*end +ftrace_filter_check '*aw*lock' '.*aw.*lock$' + +# filter by start*mid* +ftrace_filter_check 'mutex*try*' '^mutex.*try.*' + # Advanced full-glob matching feature is recently supported. # Skip the tests if we are sure the kernel does not support it. if grep -q 'accepts: .* glob-matching-pattern' README ; then diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc index 0f3f92622e33..68e7a48f5828 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc @@ -128,6 +128,43 @@ if check_set_ftrace_filter "$FUNC1" "$FUNC2" ; then fail "Expected $FUNC1 and $FUNC2" fi +test_actual() { # Compares $TMPDIR/expected with set_ftrace_filter + cat set_ftrace_filter | grep -v '#' | cut -d' ' -f1 | cut -d':' -f1 | sort -u > $TMPDIR/actual + DIFF=`diff $TMPDIR/actual $TMPDIR/expected` + test -z "$DIFF" +} + +# Set traceoff trigger for all fuctions with "lock" in their name +cat available_filter_functions | cut -d' ' -f1 | grep 'lock' | sort -u > $TMPDIR/expected +echo '*lock*:traceoff' > set_ftrace_filter +test_actual + +# now remove all with 'try' in it, and end with lock +grep -v 'try.*lock$' $TMPDIR/expected > $TMPDIR/expected2 +mv $TMPDIR/expected2 $TMPDIR/expected +echo '!*try*lock:traceoff' >> set_ftrace_filter +test_actual + +# remove all that start with "m" and end with "lock" +grep -v '^m.*lock$' $TMPDIR/expected > $TMPDIR/expected2 +mv $TMPDIR/expected2 $TMPDIR/expected +echo '!m*lock:traceoff' >> set_ftrace_filter +test_actual + +# remove all that start with "c" and have "unlock" +grep -v '^c.*unlock' $TMPDIR/expected > $TMPDIR/expected2 +mv $TMPDIR/expected2 $TMPDIR/expected +echo '!c*unlock*:traceoff' >> set_ftrace_filter +test_actual + +# clear all the rest +> $TMPDIR/expected +echo '!*:traceoff' >> set_ftrace_filter +test_actual + +rm $TMPDIR/expected +rm $TMPDIR/actual + do_reset exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index f2019b37370d..2a4f16fc9819 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -37,17 +37,21 @@ reset_ftrace_filter() { # reset all triggers in set_ftrace_filter if [ "$tr" = "" ]; then continue fi + if ! grep -q "$t" set_ftrace_filter; then + continue; + fi + name=`echo $t | cut -d: -f1 | cut -d' ' -f1` if [ $tr = "enable_event" -o $tr = "disable_event" ]; then - tr=`echo $t | cut -d: -f1-4` + tr=`echo $t | cut -d: -f2-4` limit=`echo $t | cut -d: -f5` else - tr=`echo $t | cut -d: -f1-2` + tr=`echo $t | cut -d: -f2` limit=`echo $t | cut -d: -f3` fi if [ "$limit" != "unlimited" ]; then tr="$tr:$limit" fi - echo "!$tr" > set_ftrace_filter + echo "!$name:$tr" > set_ftrace_filter done } @@ -55,6 +59,13 @@ disable_events() { echo 0 > events/enable } +clear_synthetic_events() { # reset all current synthetic events + grep -v ^# synthetic_events | + while read line; do + echo "!$line" >> synthetic_events + done +} + initialize_ftrace() { # Reset ftrace to initial-state # As the initial state, ftrace will be set to nop tracer, # no events, no triggers, no filters, no function filters, diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc new file mode 100644 index 000000000000..5ba73035e1d9 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc @@ -0,0 +1,46 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Kprobe event string type argument + +[ -f kprobe_events ] || exit_unsupported # this is configurable + +echo 0 > events/enable +echo > kprobe_events + +case `uname -m` in +x86_64) + ARG2=%si + OFFS=8 +;; +i[3456]86) + ARG2=%cx + OFFS=4 +;; +aarch64) + ARG2=%x1 + OFFS=8 +;; +arm*) + ARG2=%r1 + OFFS=4 +;; +*) + echo "Please implement other architecture here" + exit_untested +esac + +: "Test get argument (1)" +echo "p:testprobe create_trace_kprobe arg1=+0(+0(${ARG2})):string" > kprobe_events +echo 1 > events/kprobes/testprobe/enable +! echo test >> kprobe_events +tail -n 1 trace | grep -qe "testprobe.* arg1=\"test\"" + +echo 0 > events/kprobes/testprobe/enable +: "Test get argument (2)" +echo "p:testprobe create_trace_kprobe arg1=+0(+0(${ARG2})):string arg2=+0(+${OFFS}(${ARG2})):string" > kprobe_events +echo 1 > events/kprobes/testprobe/enable +! echo test1 test2 >> kprobe_events +tail -n 1 trace | grep -qe "testprobe.* arg1=\"test1\" arg2=\"test2\"" + +echo 0 > events/enable +echo > kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc new file mode 100644 index 000000000000..231bcd2c4eb5 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc @@ -0,0 +1,97 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Kprobe event argument syntax + +[ -f kprobe_events ] || exit_unsupported # this is configurable + +grep "x8/16/32/64" README > /dev/null || exit_unsupported # version issue + +echo 0 > events/enable +echo > kprobe_events + +PROBEFUNC="vfs_read" +GOODREG= +BADREG= +GOODSYM="_sdata" +if ! grep -qw ${GOODSYM} /proc/kallsyms ; then + GOODSYM=$PROBEFUNC +fi +BADSYM="deaqswdefr" +SYMADDR=0x`grep -w ${GOODSYM} /proc/kallsyms | cut -f 1 -d " "` +GOODTYPE="x16" +BADTYPE="y16" + +case `uname -m` in +x86_64|i[3456]86) + GOODREG=%ax + BADREG=%ex +;; +aarch64) + GOODREG=%x0 + BADREG=%ax +;; +arm*) + GOODREG=%r0 + BADREG=%ax +;; +esac + +test_goodarg() # Good-args +{ + while [ "$1" ]; do + echo "p ${PROBEFUNC} $1" > kprobe_events + shift 1 + done; +} + +test_badarg() # Bad-args +{ + while [ "$1" ]; do + ! echo "p ${PROBEFUNC} $1" > kprobe_events + shift 1 + done; +} + +echo > kprobe_events + +: "Register access" +test_goodarg ${GOODREG} +test_badarg ${BADREG} + +: "Symbol access" +test_goodarg "@${GOODSYM}" "@${SYMADDR}" "@${GOODSYM}+10" "@${GOODSYM}-10" +test_badarg "@" "@${BADSYM}" "@${GOODSYM}*10" "@${GOODSYM}/10" \ + "@${GOODSYM}%10" "@${GOODSYM}&10" "@${GOODSYM}|10" + +: "Stack access" +test_goodarg "\$stack" "\$stack0" "\$stack1" +test_badarg "\$stackp" "\$stack0+10" "\$stack1-10" + +: "Retval access" +echo "r ${PROBEFUNC} \$retval" > kprobe_events +! echo "p ${PROBEFUNC} \$retval" > kprobe_events + +: "Comm access" +test_goodarg "\$comm" + +: "Indirect memory access" +test_goodarg "+0(${GOODREG})" "-0(${GOODREG})" "+10(\$stack)" \ + "+0(\$stack1)" "+10(@${GOODSYM}-10)" "+0(+10(+20(\$stack)))" +test_badarg "+(${GOODREG})" "(${GOODREG}+10)" "-(${GOODREG})" "(${GOODREG})" \ + "+10(\$comm)" "+0(${GOODREG})+10" + +: "Name assignment" +test_goodarg "varname=${GOODREG}" +test_badarg "varname=varname2=${GOODREG}" + +: "Type syntax" +test_goodarg "${GOODREG}:${GOODTYPE}" +test_badarg "${GOODREG}::${GOODTYPE}" "${GOODREG}:${BADTYPE}" \ + "${GOODTYPE}:${GOODREG}" + +: "Combination check" + +test_goodarg "\$comm:string" "+0(\$stack):string" +test_badarg "\$comm:x64" "\$stack:string" "${GOODREG}:string" + +echo > kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc index bb16cf91f1b5..ce361b9d62cf 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc @@ -12,9 +12,24 @@ case `uname -m` in *) OFFS=0;; esac -echo "Setup up to 256 kprobes" -grep t /proc/kallsyms | cut -f3 -d" " | grep -v .*\\..* | \ -head -n 256 | while read i; do echo p ${i}+${OFFS} ; done > kprobe_events ||: +if [ -d events/kprobes ]; then + echo 0 > events/kprobes/enable + echo > kprobe_events +fi + +N=0 +echo "Setup up kprobes on first available 256 text symbols" +grep -i " t " /proc/kallsyms | cut -f3 -d" " | grep -v .*\\..* | \ +while read i; do + echo p ${i}+${OFFS} >> kprobe_events && N=$((N+1)) ||: + test $N -eq 256 && break +done + +L=`wc -l kprobe_events` +if [ $L -ne $N ]; then + echo "The number of kprobes events ($L) is not $N" + exit_fail +fi echo 1 > events/kprobes/enable echo 0 > events/kprobes/enable diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc b/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc new file mode 100644 index 000000000000..4fda01a08da4 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc @@ -0,0 +1,43 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Kprobe events - probe points + +[ -f kprobe_events ] || exit_unsupported # this is configurable + +TARGET_FUNC=create_trace_kprobe + +dec_addr() { # hexaddr + printf "%d" "0x"`echo $1 | tail -c 8` +} + +set_offs() { # prev target next + A1=`dec_addr $1` + A2=`dec_addr $2` + A3=`dec_addr $3` + TARGET="0x$2" # an address + PREV=`expr $A1 - $A2` # offset to previous symbol + NEXT=+`expr $A3 - $A2` # offset to next symbol + OVERFLOW=+`printf "0x%x" ${PREV}` # overflow offset to previous symbol +} + +# We have to decode symbol addresses to get correct offsets. +# If the offset is not an instruction boundary, it cause -EILSEQ. +set_offs `grep -A1 -B1 ${TARGET_FUNC} /proc/kallsyms | cut -f 1 -d " " | xargs` + +UINT_TEST=no +# printf "%x" -1 returns (unsigned long)-1. +if [ `printf "%x" -1 | wc -c` != 9 ]; then + UINT_TEST=yes +fi + +echo 0 > events/enable +echo > kprobe_events +echo "p:testprobe ${TARGET_FUNC}" > kprobe_events +echo "p:testprobe ${TARGET}" > kprobe_events +echo "p:testprobe ${TARGET_FUNC}${NEXT}" > kprobe_events +! echo "p:testprobe ${TARGET_FUNC}${PREV}" > kprobe_events +if [ "${UINT_TEST}" = yes ]; then +! echo "p:testprobe ${TARGET_FUNC}${OVERFLOW}" > kprobe_events +fi +echo > kprobe_events +clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc new file mode 100644 index 000000000000..786dce7e48be --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc @@ -0,0 +1,39 @@ +#!/bin/sh +# description: event trigger - test extended error support + + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +echo "Test extended error support" +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger &>/dev/null +if ! grep -q "ERROR:" events/sched/sched_wakeup/hist; then + fail "Failed to generate extended error in histogram" +fi + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc new file mode 100644 index 000000000000..7fd5b4a8f060 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc @@ -0,0 +1,54 @@ +#!/bin/sh +# description: event trigger - test field variable support + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +clear_synthetic_events +reset_tracer +do_reset + +echo "Test field variable support" + +echo 'wakeup_latency u64 lat; pid_t pid; int prio; char comm[16]' > synthetic_events +echo 'hist:keys=comm:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger +echo 'hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger +echo 'hist:keys=pid,prio,comm:vals=lat:sort=pid,prio' > events/synthetic/wakeup_latency/trigger + +ping localhost -c 3 +if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then + fail "Failed to create inter-event histogram" +fi + +if ! grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then + fail "Failed to create histogram with field variable" +fi + +echo '!hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger + +if grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then + fail "Failed to remove histogram with field variable" +fi + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc new file mode 100644 index 000000000000..c93dbe38b5df --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc @@ -0,0 +1,58 @@ +#!/bin/sh +# description: event trigger - test inter-event combined histogram trigger + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +reset_tracer +do_reset +clear_synthetic_events + +echo "Test create synthetic event" + +echo 'waking_latency u64 lat pid_t pid' > synthetic_events +if [ ! -d events/synthetic/waking_latency ]; then + fail "Failed to create waking_latency synthetic event" +fi + +echo "Test combined histogram" + +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger +echo 'hist:keys=pid:waking_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).waking_latency($waking_lat,pid) if comm=="ping"' > events/sched/sched_wakeup/trigger +echo 'hist:keys=pid,lat:sort=pid,lat' > events/synthetic/waking_latency/trigger + +echo 'wakeup_latency u64 lat pid_t pid' >> synthetic_events +echo 'hist:keys=pid:ts1=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger +echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts1:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid) if next_comm=="ping"' > events/sched/sched_switch/trigger + +echo 'waking+wakeup_latency u64 lat; pid_t pid' >> synthetic_events +echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking+wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger +echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking+wakeup_latency/trigger + +ping localhost -c 3 +if ! grep -q "pid:" events/synthetic/waking+wakeup_latency/hist; then + fail "Failed to create combined histogram" +fi + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc new file mode 100644 index 000000000000..e84e7d048566 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc @@ -0,0 +1,50 @@ +#!/bin/sh +# description: event trigger - test inter-event histogram trigger onmatch action + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +clear_synthetic_events +reset_tracer +do_reset + +echo "Test create synthetic event" + +echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events +if [ ! -d events/synthetic/wakeup_latency ]; then + fail "Failed to create wakeup_latency synthetic event" +fi + +echo "Test create histogram for synthetic event" +echo "Test histogram variables,simple expression support and onmatch action" + +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger +echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger +echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger +ping localhost -c 5 +if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then + fail "Failed to create onmatch action inter-event histogram" +fi + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc new file mode 100644 index 000000000000..7907d8aacde3 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc @@ -0,0 +1,50 @@ +#!/bin/sh +# description: event trigger - test inter-event histogram trigger onmatch-onmax action + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +clear_synthetic_events +reset_tracer +do_reset + +echo "Test create synthetic event" + +echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events +if [ ! -d events/synthetic/wakeup_latency ]; then + fail "Failed to create wakeup_latency synthetic event" +fi + +echo "Test create histogram for synthetic event" +echo "Test histogram variables,simple expression support and onmatch-onmax action" + +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger +echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm):onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger +echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger +ping localhost -c 5 +if [ ! grep -q "ping" events/synthetic/wakeup_latency/hist -o ! grep -q "max:" events/sched/sched_switch/hist]; then + fail "Failed to create onmatch-onmax action inter-event histogram" +fi + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc new file mode 100644 index 000000000000..38b7ed6242b2 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc @@ -0,0 +1,48 @@ +#!/bin/sh +# description: event trigger - test inter-event histogram trigger onmax action + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +clear_synthetic_events +reset_tracer +do_reset + +echo "Test create synthetic event" + +echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events +if [ ! -d events/synthetic/wakeup_latency ]; then + fail "Failed to create wakeup_latency synthetic event" +fi + +echo "Test onmax action" + +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_waking/trigger +echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger +ping localhost -c 3 +if ! grep -q "max:" events/sched/sched_switch/hist; then + fail "Failed to create onmax action inter-event histogram" +fi + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc new file mode 100644 index 000000000000..cef11377dcbd --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc @@ -0,0 +1,54 @@ +#!/bin/sh +# description: event trigger - test synthetic event create remove +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +clear_synthetic_events +reset_tracer +do_reset + +echo "Test create synthetic event" + +echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events +if [ ! -d events/synthetic/wakeup_latency ]; then + fail "Failed to create wakeup_latency synthetic event" +fi + +reset_trigger + +echo "Test create synthetic event with an error" +echo 'wakeup_latency u64 lat pid_t pid char' > synthetic_events > /dev/null +if [ -d events/synthetic/wakeup_latency ]; then + fail "Created wakeup_latency synthetic event with an invalid format" +fi + +reset_trigger + +echo "Test remove synthetic event" +echo '!wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events +if [ -d events/synthetic/wakeup_latency ]; then + fail "Failed to delete wakeup_latency synthetic event" +fi + +do_reset + +exit 0 diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile index cea4adcd42b8..8497a376ef9d 100644 --- a/tools/testing/selftests/futex/Makefile +++ b/tools/testing/selftests/futex/Makefile @@ -12,12 +12,16 @@ all: BUILD_TARGET=$(OUTPUT)/$$DIR; \ mkdir $$BUILD_TARGET -p; \ make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ - if [ -e $$DIR/$(TEST_PROGS) ]; then - rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/; - fi + if [ -e $$DIR/$(TEST_PROGS) ]; then \ + rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/; \ + fi \ done override define RUN_TESTS + @export KSFT_TAP_LEVEL=`echo 1`; + @echo "TAP version 13"; + @echo "selftests: futex"; + @echo "========================================"; @cd $(OUTPUT); ./run.sh endef diff --git a/tools/testing/selftests/gen_kselftest_tar.sh b/tools/testing/selftests/gen_kselftest_tar.sh index 17d5bd0c0936..a27e2eec3586 100755 --- a/tools/testing/selftests/gen_kselftest_tar.sh +++ b/tools/testing/selftests/gen_kselftest_tar.sh @@ -1,13 +1,11 @@ #!/bin/bash # +# SPDX-License-Identifier: GPL-2.0 # gen_kselftest_tar # Generate kselftest tarball # Author: Shuah Khan <shuahkh@osg.samsung.com> # Copyright (C) 2015 Samsung Electronics Co., Ltd. -# This software may be freely redistributed under the terms of the GNU -# General Public License (GPLv2). - # main main() { diff --git a/tools/testing/selftests/intel_pstate/Makefile b/tools/testing/selftests/intel_pstate/Makefile index 5a3f7d37e912..7340fd6a9a9f 100644 --- a/tools/testing/selftests/intel_pstate/Makefile +++ b/tools/testing/selftests/intel_pstate/Makefile @@ -2,7 +2,10 @@ CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE LDLIBS := $(LDLIBS) -lm -ifeq (,$(filter $(ARCH),x86)) +uname_M := $(shell uname -m 2>/dev/null || echo not) +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) + +ifeq (x86,$(ARCH)) TEST_GEN_FILES := msr aperf endif diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 1ae565ed9bf0..1b9d8ecdebce 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * kselftest.h: kselftest framework return codes to include from * selftests. @@ -5,7 +6,6 @@ * Copyright (c) 2014 Shuah Khan <shuahkh@osg.samsung.com> * Copyright (c) 2014 Samsung Electronics Co., Ltd. * - * This file is released under the GPLv2. */ #ifndef __KSELFTEST_H #define __KSELFTEST_H @@ -57,7 +57,8 @@ static inline int ksft_get_error_cnt(void) { return ksft_cnt.ksft_error; } static inline void ksft_print_header(void) { - printf("TAP version 13\n"); + if (!(getenv("KSFT_TAP_LEVEL"))) + printf("TAP version 13\n"); } static inline void ksft_print_cnts(void) diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index e81bd28bdd89..6ae3730c4ee3 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -107,6 +107,27 @@ __FILE__, __LINE__, _metadata->name, ##__VA_ARGS__) /** + * XFAIL(statement, fmt, ...) + * + * @statement: statement to run after reporting XFAIL + * @fmt: format string + * @...: optional arguments + * + * This forces a "pass" after reporting a failure with an XFAIL prefix, + * and runs "statement", which is usually "return" or "goto skip". + */ +#define XFAIL(statement, fmt, ...) do { \ + if (TH_LOG_ENABLED) { \ + fprintf(TH_LOG_STREAM, "[ XFAIL! ] " fmt "\n", \ + ##__VA_ARGS__); \ + } \ + /* TODO: find a way to pass xfail to test runner process. */ \ + _metadata->passed = 1; \ + _metadata->trigger = 0; \ + statement; \ +} while (0) + +/** * TEST(test_name) - Defines the test function and creates the registration * stub * @@ -198,7 +219,7 @@ /** * FIXTURE_SETUP(fixture_name) - Prepares the setup function for the fixture. - * *_metadata* is included so that ASSERT_* work as a convenience + * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly. * * @fixture_name: fixture name * @@ -221,6 +242,7 @@ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self) /** * FIXTURE_TEARDOWN(fixture_name) + * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly. * * @fixture_name: fixture name * @@ -253,6 +275,8 @@ * Defines a test that depends on a fixture (e.g., is part of a test case). * Very similar to TEST() except that *self* is the setup instance of fixture's * datatype exposed for use by the implementation. + * + * Warning: use of ASSERT_* here will skip TEARDOWN. */ /* TODO(wad) register fixtures on dedicated test lists. */ #define TEST_F(fixture_name, test_name) \ diff --git a/tools/testing/selftests/kselftest_install.sh b/tools/testing/selftests/kselftest_install.sh index 1555fbdb08da..ec304463883c 100755 --- a/tools/testing/selftests/kselftest_install.sh +++ b/tools/testing/selftests/kselftest_install.sh @@ -1,13 +1,11 @@ #!/bin/bash +# SPDX-License-Identifier: GPL-2.0 # # Kselftest Install # Install kselftest tests # Author: Shuah Khan <shuahkh@osg.samsung.com> # Copyright (C) 2015 Samsung Electronics Co., Ltd. -# This software may be freely redistributed under the terms of the GNU -# General Public License (GPLv2). - install_loc=`pwd` main() diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile new file mode 100644 index 000000000000..dc44de904797 --- /dev/null +++ b/tools/testing/selftests/kvm/Makefile @@ -0,0 +1,39 @@ +all: + +top_srcdir = ../../../../ +UNAME_M := $(shell uname -m) + +LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c +LIBKVM_x86_64 = lib/x86.c + +TEST_GEN_PROGS_x86_64 = set_sregs_test +TEST_GEN_PROGS_x86_64 += sync_regs_test + +TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) +LIBKVM += $(LIBKVM_$(UNAME_M)) + +INSTALL_HDR_PATH = $(top_srcdir)/usr +LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ +CFLAGS += -O2 -g -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) + +# After inclusion, $(OUTPUT) is defined and +# $(TEST_GEN_PROGS) starts with $(OUTPUT)/ +include ../lib.mk + +STATIC_LIBS := $(OUTPUT)/libkvm.a +LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM)) +EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS) + +x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ)))) +$(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +$(OUTPUT)/libkvm.a: $(LIBKVM_OBJ) + $(AR) crs $@ $^ + +$(LINUX_HDR_PATH): + make -C $(top_srcdir) headers_install + +all: $(STATIC_LIBS) $(LINUX_HDR_PATH) +$(TEST_GEN_PROGS): $(STATIC_LIBS) +$(TEST_GEN_PROGS) $(LIBKVM_OBJ): | $(LINUX_HDR_PATH) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h new file mode 100644 index 000000000000..57974ad46373 --- /dev/null +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -0,0 +1,142 @@ +/* + * tools/testing/selftests/kvm/include/kvm_util.h + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + */ +#ifndef SELFTEST_KVM_UTIL_H +#define SELFTEST_KVM_UTIL_H 1 + +#include "test_util.h" + +#include "asm/kvm.h" +#include "linux/kvm.h" +#include <sys/ioctl.h> + +#include "sparsebit.h" + +/* + * Memslots can't cover the gfn starting at this gpa otherwise vCPUs can't be + * created. Only applies to VMs using EPT. + */ +#define KVM_DEFAULT_IDENTITY_MAP_ADDRESS 0xfffbc000ul + + +/* Callers of kvm_util only have an incomplete/opaque description of the + * structure kvm_util is using to maintain the state of a VM. + */ +struct kvm_vm; + +typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ +typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ + +/* Minimum allocated guest virtual and physical addresses */ +#define KVM_UTIL_MIN_VADDR 0x2000 + +#define DEFAULT_GUEST_PHY_PAGES 512 +#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000 +#define DEFAULT_STACK_PGS 5 + +enum vm_guest_mode { + VM_MODE_FLAT48PG, +}; + +enum vm_mem_backing_src_type { + VM_MEM_SRC_ANONYMOUS, + VM_MEM_SRC_ANONYMOUS_THP, + VM_MEM_SRC_ANONYMOUS_HUGETLB, +}; + +int kvm_check_cap(long cap); + +struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); +void kvm_vm_free(struct kvm_vm *vmp); + +int kvm_memcmp_hva_gva(void *hva, + struct kvm_vm *vm, const vm_vaddr_t gva, size_t len); + +void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename, + uint32_t data_memslot, uint32_t pgd_memslot); + +void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); +void vcpu_dump(FILE *stream, struct kvm_vm *vm, + uint32_t vcpuid, uint8_t indent); + +void vm_create_irqchip(struct kvm_vm *vm); + +void vm_userspace_mem_region_add(struct kvm_vm *vm, + enum vm_mem_backing_src_type src_type, + uint64_t guest_paddr, uint32_t slot, uint64_t npages, + uint32_t flags); + +void vcpu_ioctl(struct kvm_vm *vm, + uint32_t vcpuid, unsigned long ioctl, void *arg); +void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); +void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); +void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid); +vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, + uint32_t data_memslot, uint32_t pgd_memslot); +void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa); +void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva); +vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva); +vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva); + +struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid); +void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); +int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); +void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_mp_state *mp_state); +void vcpu_regs_get(struct kvm_vm *vm, + uint32_t vcpuid, struct kvm_regs *regs); +void vcpu_regs_set(struct kvm_vm *vm, + uint32_t vcpuid, struct kvm_regs *regs); +void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...); +void vcpu_sregs_get(struct kvm_vm *vm, + uint32_t vcpuid, struct kvm_sregs *sregs); +void vcpu_sregs_set(struct kvm_vm *vm, + uint32_t vcpuid, struct kvm_sregs *sregs); +int _vcpu_sregs_set(struct kvm_vm *vm, + uint32_t vcpuid, struct kvm_sregs *sregs); +void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_vcpu_events *events); +void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_vcpu_events *events); + +const char *exit_reason_str(unsigned int exit_reason); + +void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot); +void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, + uint32_t pgd_memslot); +vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, + vm_paddr_t paddr_min, uint32_t memslot); + +void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid); +void vcpu_set_cpuid( + struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid); + +struct kvm_cpuid2 *allocate_kvm_cpuid2(void); +struct kvm_cpuid_entry2 * +find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function, + uint32_t index); + +static inline struct kvm_cpuid_entry2 * +find_cpuid_entry(struct kvm_cpuid2 *cpuid, uint32_t function) +{ + return find_cpuid_index_entry(cpuid, function, 0); +} + +struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code); +void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); + +struct kvm_userspace_memory_region * +kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, + uint64_t end); + +struct kvm_dirty_log * +allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region); + +int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd); + +#endif /* SELFTEST_KVM_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/sparsebit.h b/tools/testing/selftests/kvm/include/sparsebit.h new file mode 100644 index 000000000000..54cfeb6568d3 --- /dev/null +++ b/tools/testing/selftests/kvm/include/sparsebit.h @@ -0,0 +1,75 @@ +/* + * tools/testing/selftests/kvm/include/sparsebit.h + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * + * Header file that describes API to the sparsebit library. + * This library provides a memory efficient means of storing + * the settings of bits indexed via a uint64_t. Memory usage + * is reasonable, significantly less than (2^64 / 8) bytes, as + * long as bits that are mostly set or mostly cleared are close + * to each other. This library is efficient in memory usage + * even in the case where most bits are set. + */ + +#ifndef _TEST_SPARSEBIT_H_ +#define _TEST_SPARSEBIT_H_ + +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct sparsebit; +typedef uint64_t sparsebit_idx_t; +typedef uint64_t sparsebit_num_t; + +struct sparsebit *sparsebit_alloc(void); +void sparsebit_free(struct sparsebit **sbitp); +void sparsebit_copy(struct sparsebit *dstp, struct sparsebit *src); + +bool sparsebit_is_set(struct sparsebit *sbit, sparsebit_idx_t idx); +bool sparsebit_is_set_num(struct sparsebit *sbit, + sparsebit_idx_t idx, sparsebit_num_t num); +bool sparsebit_is_clear(struct sparsebit *sbit, sparsebit_idx_t idx); +bool sparsebit_is_clear_num(struct sparsebit *sbit, + sparsebit_idx_t idx, sparsebit_num_t num); +sparsebit_num_t sparsebit_num_set(struct sparsebit *sbit); +bool sparsebit_any_set(struct sparsebit *sbit); +bool sparsebit_any_clear(struct sparsebit *sbit); +bool sparsebit_all_set(struct sparsebit *sbit); +bool sparsebit_all_clear(struct sparsebit *sbit); +sparsebit_idx_t sparsebit_first_set(struct sparsebit *sbit); +sparsebit_idx_t sparsebit_first_clear(struct sparsebit *sbit); +sparsebit_idx_t sparsebit_next_set(struct sparsebit *sbit, sparsebit_idx_t prev); +sparsebit_idx_t sparsebit_next_clear(struct sparsebit *sbit, sparsebit_idx_t prev); +sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *sbit, + sparsebit_idx_t start, sparsebit_num_t num); +sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *sbit, + sparsebit_idx_t start, sparsebit_num_t num); + +void sparsebit_set(struct sparsebit *sbitp, sparsebit_idx_t idx); +void sparsebit_set_num(struct sparsebit *sbitp, sparsebit_idx_t start, + sparsebit_num_t num); +void sparsebit_set_all(struct sparsebit *sbitp); + +void sparsebit_clear(struct sparsebit *sbitp, sparsebit_idx_t idx); +void sparsebit_clear_num(struct sparsebit *sbitp, + sparsebit_idx_t start, sparsebit_num_t num); +void sparsebit_clear_all(struct sparsebit *sbitp); + +void sparsebit_dump(FILE *stream, struct sparsebit *sbit, + unsigned int indent); +void sparsebit_validate_internal(struct sparsebit *sbit); + +#ifdef __cplusplus +} +#endif + +#endif /* _TEST_SPARSEBIT_H_ */ diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h new file mode 100644 index 000000000000..7ab98e41324f --- /dev/null +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -0,0 +1,45 @@ +/* + * tools/testing/selftests/kvm/include/test_util.h + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + */ + +#ifndef TEST_UTIL_H +#define TEST_UTIL_H 1 + +#include <stdlib.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#include <errno.h> +#include <unistd.h> +#include <fcntl.h> + +ssize_t test_write(int fd, const void *buf, size_t count); +ssize_t test_read(int fd, void *buf, size_t count); +int test_seq_read(const char *path, char **bufp, size_t *sizep); + +void test_assert(bool exp, const char *exp_str, + const char *file, unsigned int line, const char *fmt, ...); + +#define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0])) + +#define TEST_ASSERT(e, fmt, ...) \ + test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__) + +#define ASSERT_EQ(a, b) do { \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + TEST_ASSERT(__a == __b, \ + "ASSERT_EQ(%s, %s) failed.\n" \ + "\t%s is %#lx\n" \ + "\t%s is %#lx", \ + #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \ +} while (0) + +#endif /* TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/x86.h b/tools/testing/selftests/kvm/include/x86.h new file mode 100644 index 000000000000..4a5b2c4c1a0f --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86.h @@ -0,0 +1,1043 @@ +/* + * tools/testing/selftests/kvm/include/x86.h + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + */ + +#ifndef SELFTEST_KVM_X86_H +#define SELFTEST_KVM_X86_H + +#include <assert.h> +#include <stdint.h> + +#define X86_EFLAGS_FIXED (1u << 1) + +#define X86_CR4_VME (1ul << 0) +#define X86_CR4_PVI (1ul << 1) +#define X86_CR4_TSD (1ul << 2) +#define X86_CR4_DE (1ul << 3) +#define X86_CR4_PSE (1ul << 4) +#define X86_CR4_PAE (1ul << 5) +#define X86_CR4_MCE (1ul << 6) +#define X86_CR4_PGE (1ul << 7) +#define X86_CR4_PCE (1ul << 8) +#define X86_CR4_OSFXSR (1ul << 9) +#define X86_CR4_OSXMMEXCPT (1ul << 10) +#define X86_CR4_UMIP (1ul << 11) +#define X86_CR4_VMXE (1ul << 13) +#define X86_CR4_SMXE (1ul << 14) +#define X86_CR4_FSGSBASE (1ul << 16) +#define X86_CR4_PCIDE (1ul << 17) +#define X86_CR4_OSXSAVE (1ul << 18) +#define X86_CR4_SMEP (1ul << 20) +#define X86_CR4_SMAP (1ul << 21) +#define X86_CR4_PKE (1ul << 22) + +/* The enum values match the intruction encoding of each register */ +enum x86_register { + RAX = 0, + RCX, + RDX, + RBX, + RSP, + RBP, + RSI, + RDI, + R8, + R9, + R10, + R11, + R12, + R13, + R14, + R15, +}; + +struct desc64 { + uint16_t limit0; + uint16_t base0; + unsigned base1:8, type:5, dpl:2, p:1; + unsigned limit1:4, zero0:3, g:1, base2:8; + uint32_t base3; + uint32_t zero1; +} __attribute__((packed)); + +struct desc_ptr { + uint16_t size; + uint64_t address; +} __attribute__((packed)); + +static inline uint64_t get_desc64_base(const struct desc64 *desc) +{ + return ((uint64_t)desc->base3 << 32) | + (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); +} + +static inline uint64_t rdtsc(void) +{ + uint32_t eax, edx; + + /* + * The lfence is to wait (on Intel CPUs) until all previous + * instructions have been executed. + */ + __asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx)); + return ((uint64_t)edx) << 32 | eax; +} + +static inline uint64_t rdtscp(uint32_t *aux) +{ + uint32_t eax, edx; + + __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux)); + return ((uint64_t)edx) << 32 | eax; +} + +static inline uint64_t rdmsr(uint32_t msr) +{ + uint32_t a, d; + + __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory"); + + return a | ((uint64_t) d << 32); +} + +static inline void wrmsr(uint32_t msr, uint64_t value) +{ + uint32_t a = value; + uint32_t d = value >> 32; + + __asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory"); +} + + +static inline uint16_t inw(uint16_t port) +{ + uint16_t tmp; + + __asm__ __volatile__("in %%dx, %%ax" + : /* output */ "=a" (tmp) + : /* input */ "d" (port)); + + return tmp; +} + +static inline uint16_t get_es(void) +{ + uint16_t es; + + __asm__ __volatile__("mov %%es, %[es]" + : /* output */ [es]"=rm"(es)); + return es; +} + +static inline uint16_t get_cs(void) +{ + uint16_t cs; + + __asm__ __volatile__("mov %%cs, %[cs]" + : /* output */ [cs]"=rm"(cs)); + return cs; +} + +static inline uint16_t get_ss(void) +{ + uint16_t ss; + + __asm__ __volatile__("mov %%ss, %[ss]" + : /* output */ [ss]"=rm"(ss)); + return ss; +} + +static inline uint16_t get_ds(void) +{ + uint16_t ds; + + __asm__ __volatile__("mov %%ds, %[ds]" + : /* output */ [ds]"=rm"(ds)); + return ds; +} + +static inline uint16_t get_fs(void) +{ + uint16_t fs; + + __asm__ __volatile__("mov %%fs, %[fs]" + : /* output */ [fs]"=rm"(fs)); + return fs; +} + +static inline uint16_t get_gs(void) +{ + uint16_t gs; + + __asm__ __volatile__("mov %%gs, %[gs]" + : /* output */ [gs]"=rm"(gs)); + return gs; +} + +static inline uint16_t get_tr(void) +{ + uint16_t tr; + + __asm__ __volatile__("str %[tr]" + : /* output */ [tr]"=rm"(tr)); + return tr; +} + +static inline uint64_t get_cr0(void) +{ + uint64_t cr0; + + __asm__ __volatile__("mov %%cr0, %[cr0]" + : /* output */ [cr0]"=r"(cr0)); + return cr0; +} + +static inline uint64_t get_cr3(void) +{ + uint64_t cr3; + + __asm__ __volatile__("mov %%cr3, %[cr3]" + : /* output */ [cr3]"=r"(cr3)); + return cr3; +} + +static inline uint64_t get_cr4(void) +{ + uint64_t cr4; + + __asm__ __volatile__("mov %%cr4, %[cr4]" + : /* output */ [cr4]"=r"(cr4)); + return cr4; +} + +static inline void set_cr4(uint64_t val) +{ + __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory"); +} + +static inline uint64_t get_gdt_base(void) +{ + struct desc_ptr gdt; + __asm__ __volatile__("sgdt %[gdt]" + : /* output */ [gdt]"=m"(gdt)); + return gdt.address; +} + +static inline uint64_t get_idt_base(void) +{ + struct desc_ptr idt; + __asm__ __volatile__("sidt %[idt]" + : /* output */ [idt]"=m"(idt)); + return idt.address; +} + +#define SET_XMM(__var, __xmm) \ + asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm) + +static inline void set_xmm(int n, unsigned long val) +{ + switch (n) { + case 0: + SET_XMM(val, xmm0); + break; + case 1: + SET_XMM(val, xmm1); + break; + case 2: + SET_XMM(val, xmm2); + break; + case 3: + SET_XMM(val, xmm3); + break; + case 4: + SET_XMM(val, xmm4); + break; + case 5: + SET_XMM(val, xmm5); + break; + case 6: + SET_XMM(val, xmm6); + break; + case 7: + SET_XMM(val, xmm7); + break; + } +} + +typedef unsigned long v1di __attribute__ ((vector_size (8))); +static inline unsigned long get_xmm(int n) +{ + assert(n >= 0 && n <= 7); + + register v1di xmm0 __asm__("%xmm0"); + register v1di xmm1 __asm__("%xmm1"); + register v1di xmm2 __asm__("%xmm2"); + register v1di xmm3 __asm__("%xmm3"); + register v1di xmm4 __asm__("%xmm4"); + register v1di xmm5 __asm__("%xmm5"); + register v1di xmm6 __asm__("%xmm6"); + register v1di xmm7 __asm__("%xmm7"); + switch (n) { + case 0: + return (unsigned long)xmm0; + case 1: + return (unsigned long)xmm1; + case 2: + return (unsigned long)xmm2; + case 3: + return (unsigned long)xmm3; + case 4: + return (unsigned long)xmm4; + case 5: + return (unsigned long)xmm5; + case 6: + return (unsigned long)xmm6; + case 7: + return (unsigned long)xmm7; + } + return 0; +} + +/* + * Basic CPU control in CR0 + */ +#define X86_CR0_PE (1UL<<0) /* Protection Enable */ +#define X86_CR0_MP (1UL<<1) /* Monitor Coprocessor */ +#define X86_CR0_EM (1UL<<2) /* Emulation */ +#define X86_CR0_TS (1UL<<3) /* Task Switched */ +#define X86_CR0_ET (1UL<<4) /* Extension Type */ +#define X86_CR0_NE (1UL<<5) /* Numeric Error */ +#define X86_CR0_WP (1UL<<16) /* Write Protect */ +#define X86_CR0_AM (1UL<<18) /* Alignment Mask */ +#define X86_CR0_NW (1UL<<29) /* Not Write-through */ +#define X86_CR0_CD (1UL<<30) /* Cache Disable */ +#define X86_CR0_PG (1UL<<31) /* Paging */ + +/* + * CPU model specific register (MSR) numbers. + */ + +/* x86-64 specific MSRs */ +#define MSR_EFER 0xc0000080 /* extended feature register */ +#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */ +#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ +#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */ +#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ +#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ +#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ +#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */ +#define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */ + +/* EFER bits: */ +#define EFER_SCE (1<<0) /* SYSCALL/SYSRET */ +#define EFER_LME (1<<8) /* Long mode enable */ +#define EFER_LMA (1<<10) /* Long mode active (read-only) */ +#define EFER_NX (1<<11) /* No execute enable */ +#define EFER_SVME (1<<12) /* Enable virtualization */ +#define EFER_LMSLE (1<<13) /* Long Mode Segment Limit Enable */ +#define EFER_FFXSR (1<<14) /* Enable Fast FXSAVE/FXRSTOR */ + +/* Intel MSRs. Some also available on other CPUs */ + +#define MSR_PPIN_CTL 0x0000004e +#define MSR_PPIN 0x0000004f + +#define MSR_IA32_PERFCTR0 0x000000c1 +#define MSR_IA32_PERFCTR1 0x000000c2 +#define MSR_FSB_FREQ 0x000000cd +#define MSR_PLATFORM_INFO 0x000000ce +#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31 +#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT) + +#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 +#define NHM_C3_AUTO_DEMOTE (1UL << 25) +#define NHM_C1_AUTO_DEMOTE (1UL << 26) +#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) +#define SNB_C1_AUTO_UNDEMOTE (1UL << 27) +#define SNB_C3_AUTO_UNDEMOTE (1UL << 28) + +#define MSR_MTRRcap 0x000000fe +#define MSR_IA32_BBL_CR_CTL 0x00000119 +#define MSR_IA32_BBL_CR_CTL3 0x0000011e + +#define MSR_IA32_SYSENTER_CS 0x00000174 +#define MSR_IA32_SYSENTER_ESP 0x00000175 +#define MSR_IA32_SYSENTER_EIP 0x00000176 + +#define MSR_IA32_MCG_CAP 0x00000179 +#define MSR_IA32_MCG_STATUS 0x0000017a +#define MSR_IA32_MCG_CTL 0x0000017b +#define MSR_IA32_MCG_EXT_CTL 0x000004d0 + +#define MSR_OFFCORE_RSP_0 0x000001a6 +#define MSR_OFFCORE_RSP_1 0x000001a7 +#define MSR_TURBO_RATIO_LIMIT 0x000001ad +#define MSR_TURBO_RATIO_LIMIT1 0x000001ae +#define MSR_TURBO_RATIO_LIMIT2 0x000001af + +#define MSR_LBR_SELECT 0x000001c8 +#define MSR_LBR_TOS 0x000001c9 +#define MSR_LBR_NHM_FROM 0x00000680 +#define MSR_LBR_NHM_TO 0x000006c0 +#define MSR_LBR_CORE_FROM 0x00000040 +#define MSR_LBR_CORE_TO 0x00000060 + +#define MSR_LBR_INFO_0 0x00000dc0 /* ... 0xddf for _31 */ +#define LBR_INFO_MISPRED BIT_ULL(63) +#define LBR_INFO_IN_TX BIT_ULL(62) +#define LBR_INFO_ABORT BIT_ULL(61) +#define LBR_INFO_CYCLES 0xffff + +#define MSR_IA32_PEBS_ENABLE 0x000003f1 +#define MSR_IA32_DS_AREA 0x00000600 +#define MSR_IA32_PERF_CAPABILITIES 0x00000345 +#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 + +#define MSR_IA32_RTIT_CTL 0x00000570 +#define MSR_IA32_RTIT_STATUS 0x00000571 +#define MSR_IA32_RTIT_ADDR0_A 0x00000580 +#define MSR_IA32_RTIT_ADDR0_B 0x00000581 +#define MSR_IA32_RTIT_ADDR1_A 0x00000582 +#define MSR_IA32_RTIT_ADDR1_B 0x00000583 +#define MSR_IA32_RTIT_ADDR2_A 0x00000584 +#define MSR_IA32_RTIT_ADDR2_B 0x00000585 +#define MSR_IA32_RTIT_ADDR3_A 0x00000586 +#define MSR_IA32_RTIT_ADDR3_B 0x00000587 +#define MSR_IA32_RTIT_CR3_MATCH 0x00000572 +#define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560 +#define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561 + +#define MSR_MTRRfix64K_00000 0x00000250 +#define MSR_MTRRfix16K_80000 0x00000258 +#define MSR_MTRRfix16K_A0000 0x00000259 +#define MSR_MTRRfix4K_C0000 0x00000268 +#define MSR_MTRRfix4K_C8000 0x00000269 +#define MSR_MTRRfix4K_D0000 0x0000026a +#define MSR_MTRRfix4K_D8000 0x0000026b +#define MSR_MTRRfix4K_E0000 0x0000026c +#define MSR_MTRRfix4K_E8000 0x0000026d +#define MSR_MTRRfix4K_F0000 0x0000026e +#define MSR_MTRRfix4K_F8000 0x0000026f +#define MSR_MTRRdefType 0x000002ff + +#define MSR_IA32_CR_PAT 0x00000277 + +#define MSR_IA32_DEBUGCTLMSR 0x000001d9 +#define MSR_IA32_LASTBRANCHFROMIP 0x000001db +#define MSR_IA32_LASTBRANCHTOIP 0x000001dc +#define MSR_IA32_LASTINTFROMIP 0x000001dd +#define MSR_IA32_LASTINTTOIP 0x000001de + +/* DEBUGCTLMSR bits (others vary by model): */ +#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ +#define DEBUGCTLMSR_BTF_SHIFT 1 +#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ +#define DEBUGCTLMSR_TR (1UL << 6) +#define DEBUGCTLMSR_BTS (1UL << 7) +#define DEBUGCTLMSR_BTINT (1UL << 8) +#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9) +#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) +#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) +#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14 +#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT) + +#define MSR_PEBS_FRONTEND 0x000003f7 + +#define MSR_IA32_POWER_CTL 0x000001fc + +#define MSR_IA32_MC0_CTL 0x00000400 +#define MSR_IA32_MC0_STATUS 0x00000401 +#define MSR_IA32_MC0_ADDR 0x00000402 +#define MSR_IA32_MC0_MISC 0x00000403 + +/* C-state Residency Counters */ +#define MSR_PKG_C3_RESIDENCY 0x000003f8 +#define MSR_PKG_C6_RESIDENCY 0x000003f9 +#define MSR_ATOM_PKG_C6_RESIDENCY 0x000003fa +#define MSR_PKG_C7_RESIDENCY 0x000003fa +#define MSR_CORE_C3_RESIDENCY 0x000003fc +#define MSR_CORE_C6_RESIDENCY 0x000003fd +#define MSR_CORE_C7_RESIDENCY 0x000003fe +#define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff +#define MSR_PKG_C2_RESIDENCY 0x0000060d +#define MSR_PKG_C8_RESIDENCY 0x00000630 +#define MSR_PKG_C9_RESIDENCY 0x00000631 +#define MSR_PKG_C10_RESIDENCY 0x00000632 + +/* Interrupt Response Limit */ +#define MSR_PKGC3_IRTL 0x0000060a +#define MSR_PKGC6_IRTL 0x0000060b +#define MSR_PKGC7_IRTL 0x0000060c +#define MSR_PKGC8_IRTL 0x00000633 +#define MSR_PKGC9_IRTL 0x00000634 +#define MSR_PKGC10_IRTL 0x00000635 + +/* Run Time Average Power Limiting (RAPL) Interface */ + +#define MSR_RAPL_POWER_UNIT 0x00000606 + +#define MSR_PKG_POWER_LIMIT 0x00000610 +#define MSR_PKG_ENERGY_STATUS 0x00000611 +#define MSR_PKG_PERF_STATUS 0x00000613 +#define MSR_PKG_POWER_INFO 0x00000614 + +#define MSR_DRAM_POWER_LIMIT 0x00000618 +#define MSR_DRAM_ENERGY_STATUS 0x00000619 +#define MSR_DRAM_PERF_STATUS 0x0000061b +#define MSR_DRAM_POWER_INFO 0x0000061c + +#define MSR_PP0_POWER_LIMIT 0x00000638 +#define MSR_PP0_ENERGY_STATUS 0x00000639 +#define MSR_PP0_POLICY 0x0000063a +#define MSR_PP0_PERF_STATUS 0x0000063b + +#define MSR_PP1_POWER_LIMIT 0x00000640 +#define MSR_PP1_ENERGY_STATUS 0x00000641 +#define MSR_PP1_POLICY 0x00000642 + +/* Config TDP MSRs */ +#define MSR_CONFIG_TDP_NOMINAL 0x00000648 +#define MSR_CONFIG_TDP_LEVEL_1 0x00000649 +#define MSR_CONFIG_TDP_LEVEL_2 0x0000064A +#define MSR_CONFIG_TDP_CONTROL 0x0000064B +#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C + +#define MSR_PLATFORM_ENERGY_STATUS 0x0000064D + +#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658 +#define MSR_PKG_ANY_CORE_C0_RES 0x00000659 +#define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A +#define MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B + +#define MSR_CORE_C1_RES 0x00000660 +#define MSR_MODULE_C6_RES_MS 0x00000664 + +#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668 +#define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669 + +#define MSR_ATOM_CORE_RATIOS 0x0000066a +#define MSR_ATOM_CORE_VIDS 0x0000066b +#define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c +#define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d + + +#define MSR_CORE_PERF_LIMIT_REASONS 0x00000690 +#define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 +#define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 + +/* Hardware P state interface */ +#define MSR_PPERF 0x0000064e +#define MSR_PERF_LIMIT_REASONS 0x0000064f +#define MSR_PM_ENABLE 0x00000770 +#define MSR_HWP_CAPABILITIES 0x00000771 +#define MSR_HWP_REQUEST_PKG 0x00000772 +#define MSR_HWP_INTERRUPT 0x00000773 +#define MSR_HWP_REQUEST 0x00000774 +#define MSR_HWP_STATUS 0x00000777 + +/* CPUID.6.EAX */ +#define HWP_BASE_BIT (1<<7) +#define HWP_NOTIFICATIONS_BIT (1<<8) +#define HWP_ACTIVITY_WINDOW_BIT (1<<9) +#define HWP_ENERGY_PERF_PREFERENCE_BIT (1<<10) +#define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11) + +/* IA32_HWP_CAPABILITIES */ +#define HWP_HIGHEST_PERF(x) (((x) >> 0) & 0xff) +#define HWP_GUARANTEED_PERF(x) (((x) >> 8) & 0xff) +#define HWP_MOSTEFFICIENT_PERF(x) (((x) >> 16) & 0xff) +#define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff) + +/* IA32_HWP_REQUEST */ +#define HWP_MIN_PERF(x) (x & 0xff) +#define HWP_MAX_PERF(x) ((x & 0xff) << 8) +#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) +#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24) +#define HWP_EPP_PERFORMANCE 0x00 +#define HWP_EPP_BALANCE_PERFORMANCE 0x80 +#define HWP_EPP_BALANCE_POWERSAVE 0xC0 +#define HWP_EPP_POWERSAVE 0xFF +#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32) +#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42) + +/* IA32_HWP_STATUS */ +#define HWP_GUARANTEED_CHANGE(x) (x & 0x1) +#define HWP_EXCURSION_TO_MINIMUM(x) (x & 0x4) + +/* IA32_HWP_INTERRUPT */ +#define HWP_CHANGE_TO_GUARANTEED_INT(x) (x & 0x1) +#define HWP_EXCURSION_TO_MINIMUM_INT(x) (x & 0x2) + +#define MSR_AMD64_MC0_MASK 0xc0010044 + +#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) +#define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x)) +#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x)) +#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x)) + +#define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x)) + +/* These are consecutive and not in the normal 4er MCE bank block */ +#define MSR_IA32_MC0_CTL2 0x00000280 +#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) + +#define MSR_P6_PERFCTR0 0x000000c1 +#define MSR_P6_PERFCTR1 0x000000c2 +#define MSR_P6_EVNTSEL0 0x00000186 +#define MSR_P6_EVNTSEL1 0x00000187 + +#define MSR_KNC_PERFCTR0 0x00000020 +#define MSR_KNC_PERFCTR1 0x00000021 +#define MSR_KNC_EVNTSEL0 0x00000028 +#define MSR_KNC_EVNTSEL1 0x00000029 + +/* Alternative perfctr range with full access. */ +#define MSR_IA32_PMC0 0x000004c1 + +/* AMD64 MSRs. Not complete. See the architecture manual for a more + complete list. */ + +#define MSR_AMD64_PATCH_LEVEL 0x0000008b +#define MSR_AMD64_TSC_RATIO 0xc0000104 +#define MSR_AMD64_NB_CFG 0xc001001f +#define MSR_AMD64_PATCH_LOADER 0xc0010020 +#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 +#define MSR_AMD64_OSVW_STATUS 0xc0010141 +#define MSR_AMD64_LS_CFG 0xc0011020 +#define MSR_AMD64_DC_CFG 0xc0011022 +#define MSR_AMD64_BU_CFG2 0xc001102a +#define MSR_AMD64_IBSFETCHCTL 0xc0011030 +#define MSR_AMD64_IBSFETCHLINAD 0xc0011031 +#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 +#define MSR_AMD64_IBSFETCH_REG_COUNT 3 +#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1) +#define MSR_AMD64_IBSOPCTL 0xc0011033 +#define MSR_AMD64_IBSOPRIP 0xc0011034 +#define MSR_AMD64_IBSOPDATA 0xc0011035 +#define MSR_AMD64_IBSOPDATA2 0xc0011036 +#define MSR_AMD64_IBSOPDATA3 0xc0011037 +#define MSR_AMD64_IBSDCLINAD 0xc0011038 +#define MSR_AMD64_IBSDCPHYSAD 0xc0011039 +#define MSR_AMD64_IBSOP_REG_COUNT 7 +#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1) +#define MSR_AMD64_IBSCTL 0xc001103a +#define MSR_AMD64_IBSBRTARGET 0xc001103b +#define MSR_AMD64_IBSOPDATA4 0xc001103d +#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ +#define MSR_AMD64_SEV 0xc0010131 +#define MSR_AMD64_SEV_ENABLED_BIT 0 +#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) + +/* Fam 17h MSRs */ +#define MSR_F17H_IRPERF 0xc00000e9 + +/* Fam 16h MSRs */ +#define MSR_F16H_L2I_PERF_CTL 0xc0010230 +#define MSR_F16H_L2I_PERF_CTR 0xc0010231 +#define MSR_F16H_DR1_ADDR_MASK 0xc0011019 +#define MSR_F16H_DR2_ADDR_MASK 0xc001101a +#define MSR_F16H_DR3_ADDR_MASK 0xc001101b +#define MSR_F16H_DR0_ADDR_MASK 0xc0011027 + +/* Fam 15h MSRs */ +#define MSR_F15H_PERF_CTL 0xc0010200 +#define MSR_F15H_PERF_CTR 0xc0010201 +#define MSR_F15H_NB_PERF_CTL 0xc0010240 +#define MSR_F15H_NB_PERF_CTR 0xc0010241 +#define MSR_F15H_PTSC 0xc0010280 +#define MSR_F15H_IC_CFG 0xc0011021 + +/* Fam 10h MSRs */ +#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 +#define FAM10H_MMIO_CONF_ENABLE (1<<0) +#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf +#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 +#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL +#define FAM10H_MMIO_CONF_BASE_SHIFT 20 +#define MSR_FAM10H_NODE_ID 0xc001100c +#define MSR_F10H_DECFG 0xc0011029 +#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 +#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) + +/* K8 MSRs */ +#define MSR_K8_TOP_MEM1 0xc001001a +#define MSR_K8_TOP_MEM2 0xc001001d +#define MSR_K8_SYSCFG 0xc0010010 +#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23 +#define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT) +#define MSR_K8_INT_PENDING_MSG 0xc0010055 +/* C1E active bits in int pending message */ +#define K8_INTP_C1E_ACTIVE_MASK 0x18000000 +#define MSR_K8_TSEG_ADDR 0xc0010112 +#define MSR_K8_TSEG_MASK 0xc0010113 +#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */ +#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */ +#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */ + +/* K7 MSRs */ +#define MSR_K7_EVNTSEL0 0xc0010000 +#define MSR_K7_PERFCTR0 0xc0010004 +#define MSR_K7_EVNTSEL1 0xc0010001 +#define MSR_K7_PERFCTR1 0xc0010005 +#define MSR_K7_EVNTSEL2 0xc0010002 +#define MSR_K7_PERFCTR2 0xc0010006 +#define MSR_K7_EVNTSEL3 0xc0010003 +#define MSR_K7_PERFCTR3 0xc0010007 +#define MSR_K7_CLK_CTL 0xc001001b +#define MSR_K7_HWCR 0xc0010015 +#define MSR_K7_HWCR_SMMLOCK_BIT 0 +#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT) +#define MSR_K7_FID_VID_CTL 0xc0010041 +#define MSR_K7_FID_VID_STATUS 0xc0010042 + +/* K6 MSRs */ +#define MSR_K6_WHCR 0xc0000082 +#define MSR_K6_UWCCR 0xc0000085 +#define MSR_K6_EPMR 0xc0000086 +#define MSR_K6_PSOR 0xc0000087 +#define MSR_K6_PFIR 0xc0000088 + +/* Centaur-Hauls/IDT defined MSRs. */ +#define MSR_IDT_FCR1 0x00000107 +#define MSR_IDT_FCR2 0x00000108 +#define MSR_IDT_FCR3 0x00000109 +#define MSR_IDT_FCR4 0x0000010a + +#define MSR_IDT_MCR0 0x00000110 +#define MSR_IDT_MCR1 0x00000111 +#define MSR_IDT_MCR2 0x00000112 +#define MSR_IDT_MCR3 0x00000113 +#define MSR_IDT_MCR4 0x00000114 +#define MSR_IDT_MCR5 0x00000115 +#define MSR_IDT_MCR6 0x00000116 +#define MSR_IDT_MCR7 0x00000117 +#define MSR_IDT_MCR_CTRL 0x00000120 + +/* VIA Cyrix defined MSRs*/ +#define MSR_VIA_FCR 0x00001107 +#define MSR_VIA_LONGHAUL 0x0000110a +#define MSR_VIA_RNG 0x0000110b +#define MSR_VIA_BCR2 0x00001147 + +/* Transmeta defined MSRs */ +#define MSR_TMTA_LONGRUN_CTRL 0x80868010 +#define MSR_TMTA_LONGRUN_FLAGS 0x80868011 +#define MSR_TMTA_LRTI_READOUT 0x80868018 +#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a + +/* Intel defined MSRs. */ +#define MSR_IA32_P5_MC_ADDR 0x00000000 +#define MSR_IA32_P5_MC_TYPE 0x00000001 +#define MSR_IA32_TSC 0x00000010 +#define MSR_IA32_PLATFORM_ID 0x00000017 +#define MSR_IA32_EBL_CR_POWERON 0x0000002a +#define MSR_EBC_FREQUENCY_ID 0x0000002c +#define MSR_SMI_COUNT 0x00000034 +#define MSR_IA32_FEATURE_CONTROL 0x0000003a +#define MSR_IA32_TSC_ADJUST 0x0000003b +#define MSR_IA32_BNDCFGS 0x00000d90 + +#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc + +#define MSR_IA32_XSS 0x00000da0 + +#define FEATURE_CONTROL_LOCKED (1<<0) +#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) +#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) +#define FEATURE_CONTROL_LMCE (1<<20) + +#define MSR_IA32_APICBASE 0x0000001b +#define MSR_IA32_APICBASE_BSP (1<<8) +#define MSR_IA32_APICBASE_ENABLE (1<<11) +#define MSR_IA32_APICBASE_BASE (0xfffff<<12) + +#define MSR_IA32_TSCDEADLINE 0x000006e0 + +#define MSR_IA32_UCODE_WRITE 0x00000079 +#define MSR_IA32_UCODE_REV 0x0000008b + +#define MSR_IA32_SMM_MONITOR_CTL 0x0000009b +#define MSR_IA32_SMBASE 0x0000009e + +#define MSR_IA32_PERF_STATUS 0x00000198 +#define MSR_IA32_PERF_CTL 0x00000199 +#define INTEL_PERF_CTL_MASK 0xffff +#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 +#define MSR_AMD_PERF_STATUS 0xc0010063 +#define MSR_AMD_PERF_CTL 0xc0010062 + +#define MSR_IA32_MPERF 0x000000e7 +#define MSR_IA32_APERF 0x000000e8 + +#define MSR_IA32_THERM_CONTROL 0x0000019a +#define MSR_IA32_THERM_INTERRUPT 0x0000019b + +#define THERM_INT_HIGH_ENABLE (1 << 0) +#define THERM_INT_LOW_ENABLE (1 << 1) +#define THERM_INT_PLN_ENABLE (1 << 24) + +#define MSR_IA32_THERM_STATUS 0x0000019c + +#define THERM_STATUS_PROCHOT (1 << 0) +#define THERM_STATUS_POWER_LIMIT (1 << 10) + +#define MSR_THERM2_CTL 0x0000019d + +#define MSR_THERM2_CTL_TM_SELECT (1ULL << 16) + +#define MSR_IA32_MISC_ENABLE 0x000001a0 + +#define MSR_IA32_TEMPERATURE_TARGET 0x000001a2 + +#define MSR_MISC_FEATURE_CONTROL 0x000001a4 +#define MSR_MISC_PWR_MGMT 0x000001aa + +#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 +#define ENERGY_PERF_BIAS_PERFORMANCE 0 +#define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE 4 +#define ENERGY_PERF_BIAS_NORMAL 6 +#define ENERGY_PERF_BIAS_BALANCE_POWERSAVE 8 +#define ENERGY_PERF_BIAS_POWERSAVE 15 + +#define MSR_IA32_PACKAGE_THERM_STATUS 0x000001b1 + +#define PACKAGE_THERM_STATUS_PROCHOT (1 << 0) +#define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10) + +#define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2 + +#define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0) +#define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1) +#define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24) + +/* Thermal Thresholds Support */ +#define THERM_INT_THRESHOLD0_ENABLE (1 << 15) +#define THERM_SHIFT_THRESHOLD0 8 +#define THERM_MASK_THRESHOLD0 (0x7f << THERM_SHIFT_THRESHOLD0) +#define THERM_INT_THRESHOLD1_ENABLE (1 << 23) +#define THERM_SHIFT_THRESHOLD1 16 +#define THERM_MASK_THRESHOLD1 (0x7f << THERM_SHIFT_THRESHOLD1) +#define THERM_STATUS_THRESHOLD0 (1 << 6) +#define THERM_LOG_THRESHOLD0 (1 << 7) +#define THERM_STATUS_THRESHOLD1 (1 << 8) +#define THERM_LOG_THRESHOLD1 (1 << 9) + +/* MISC_ENABLE bits: architectural */ +#define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT 0 +#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) +#define MSR_IA32_MISC_ENABLE_TCC_BIT 1 +#define MSR_IA32_MISC_ENABLE_TCC (1ULL << MSR_IA32_MISC_ENABLE_TCC_BIT) +#define MSR_IA32_MISC_ENABLE_EMON_BIT 7 +#define MSR_IA32_MISC_ENABLE_EMON (1ULL << MSR_IA32_MISC_ENABLE_EMON_BIT) +#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT 11 +#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT) +#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT 12 +#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT) +#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT 16 +#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT) +#define MSR_IA32_MISC_ENABLE_MWAIT_BIT 18 +#define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT) +#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT 22 +#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) +#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT 23 +#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT 34 +#define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT) + +/* MISC_ENABLE bits: model-specific, meaning may vary from core to core */ +#define MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT 2 +#define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT) +#define MSR_IA32_MISC_ENABLE_TM1_BIT 3 +#define MSR_IA32_MISC_ENABLE_TM1 (1ULL << MSR_IA32_MISC_ENABLE_TM1_BIT) +#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT 4 +#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT 6 +#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT 8 +#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT) +#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT 9 +#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_FERR_BIT 10 +#define MSR_IA32_MISC_ENABLE_FERR (1ULL << MSR_IA32_MISC_ENABLE_FERR_BIT) +#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT 10 +#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT) +#define MSR_IA32_MISC_ENABLE_TM2_BIT 13 +#define MSR_IA32_MISC_ENABLE_TM2 (1ULL << MSR_IA32_MISC_ENABLE_TM2_BIT) +#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT 19 +#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT 20 +#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT) +#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT 24 +#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT) +#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT 37 +#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT 38 +#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39 +#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT) + +/* MISC_FEATURES_ENABLES non-architectural features */ +#define MSR_MISC_FEATURES_ENABLES 0x00000140 + +#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT 0 +#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT) +#define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT 1 + +#define MSR_IA32_TSC_DEADLINE 0x000006E0 + +/* P4/Xeon+ specific */ +#define MSR_IA32_MCG_EAX 0x00000180 +#define MSR_IA32_MCG_EBX 0x00000181 +#define MSR_IA32_MCG_ECX 0x00000182 +#define MSR_IA32_MCG_EDX 0x00000183 +#define MSR_IA32_MCG_ESI 0x00000184 +#define MSR_IA32_MCG_EDI 0x00000185 +#define MSR_IA32_MCG_EBP 0x00000186 +#define MSR_IA32_MCG_ESP 0x00000187 +#define MSR_IA32_MCG_EFLAGS 0x00000188 +#define MSR_IA32_MCG_EIP 0x00000189 +#define MSR_IA32_MCG_RESERVED 0x0000018a + +/* Pentium IV performance counter MSRs */ +#define MSR_P4_BPU_PERFCTR0 0x00000300 +#define MSR_P4_BPU_PERFCTR1 0x00000301 +#define MSR_P4_BPU_PERFCTR2 0x00000302 +#define MSR_P4_BPU_PERFCTR3 0x00000303 +#define MSR_P4_MS_PERFCTR0 0x00000304 +#define MSR_P4_MS_PERFCTR1 0x00000305 +#define MSR_P4_MS_PERFCTR2 0x00000306 +#define MSR_P4_MS_PERFCTR3 0x00000307 +#define MSR_P4_FLAME_PERFCTR0 0x00000308 +#define MSR_P4_FLAME_PERFCTR1 0x00000309 +#define MSR_P4_FLAME_PERFCTR2 0x0000030a +#define MSR_P4_FLAME_PERFCTR3 0x0000030b +#define MSR_P4_IQ_PERFCTR0 0x0000030c +#define MSR_P4_IQ_PERFCTR1 0x0000030d +#define MSR_P4_IQ_PERFCTR2 0x0000030e +#define MSR_P4_IQ_PERFCTR3 0x0000030f +#define MSR_P4_IQ_PERFCTR4 0x00000310 +#define MSR_P4_IQ_PERFCTR5 0x00000311 +#define MSR_P4_BPU_CCCR0 0x00000360 +#define MSR_P4_BPU_CCCR1 0x00000361 +#define MSR_P4_BPU_CCCR2 0x00000362 +#define MSR_P4_BPU_CCCR3 0x00000363 +#define MSR_P4_MS_CCCR0 0x00000364 +#define MSR_P4_MS_CCCR1 0x00000365 +#define MSR_P4_MS_CCCR2 0x00000366 +#define MSR_P4_MS_CCCR3 0x00000367 +#define MSR_P4_FLAME_CCCR0 0x00000368 +#define MSR_P4_FLAME_CCCR1 0x00000369 +#define MSR_P4_FLAME_CCCR2 0x0000036a +#define MSR_P4_FLAME_CCCR3 0x0000036b +#define MSR_P4_IQ_CCCR0 0x0000036c +#define MSR_P4_IQ_CCCR1 0x0000036d +#define MSR_P4_IQ_CCCR2 0x0000036e +#define MSR_P4_IQ_CCCR3 0x0000036f +#define MSR_P4_IQ_CCCR4 0x00000370 +#define MSR_P4_IQ_CCCR5 0x00000371 +#define MSR_P4_ALF_ESCR0 0x000003ca +#define MSR_P4_ALF_ESCR1 0x000003cb +#define MSR_P4_BPU_ESCR0 0x000003b2 +#define MSR_P4_BPU_ESCR1 0x000003b3 +#define MSR_P4_BSU_ESCR0 0x000003a0 +#define MSR_P4_BSU_ESCR1 0x000003a1 +#define MSR_P4_CRU_ESCR0 0x000003b8 +#define MSR_P4_CRU_ESCR1 0x000003b9 +#define MSR_P4_CRU_ESCR2 0x000003cc +#define MSR_P4_CRU_ESCR3 0x000003cd +#define MSR_P4_CRU_ESCR4 0x000003e0 +#define MSR_P4_CRU_ESCR5 0x000003e1 +#define MSR_P4_DAC_ESCR0 0x000003a8 +#define MSR_P4_DAC_ESCR1 0x000003a9 +#define MSR_P4_FIRM_ESCR0 0x000003a4 +#define MSR_P4_FIRM_ESCR1 0x000003a5 +#define MSR_P4_FLAME_ESCR0 0x000003a6 +#define MSR_P4_FLAME_ESCR1 0x000003a7 +#define MSR_P4_FSB_ESCR0 0x000003a2 +#define MSR_P4_FSB_ESCR1 0x000003a3 +#define MSR_P4_IQ_ESCR0 0x000003ba +#define MSR_P4_IQ_ESCR1 0x000003bb +#define MSR_P4_IS_ESCR0 0x000003b4 +#define MSR_P4_IS_ESCR1 0x000003b5 +#define MSR_P4_ITLB_ESCR0 0x000003b6 +#define MSR_P4_ITLB_ESCR1 0x000003b7 +#define MSR_P4_IX_ESCR0 0x000003c8 +#define MSR_P4_IX_ESCR1 0x000003c9 +#define MSR_P4_MOB_ESCR0 0x000003aa +#define MSR_P4_MOB_ESCR1 0x000003ab +#define MSR_P4_MS_ESCR0 0x000003c0 +#define MSR_P4_MS_ESCR1 0x000003c1 +#define MSR_P4_PMH_ESCR0 0x000003ac +#define MSR_P4_PMH_ESCR1 0x000003ad +#define MSR_P4_RAT_ESCR0 0x000003bc +#define MSR_P4_RAT_ESCR1 0x000003bd +#define MSR_P4_SAAT_ESCR0 0x000003ae +#define MSR_P4_SAAT_ESCR1 0x000003af +#define MSR_P4_SSU_ESCR0 0x000003be +#define MSR_P4_SSU_ESCR1 0x000003bf /* guess: not in manual */ + +#define MSR_P4_TBPU_ESCR0 0x000003c2 +#define MSR_P4_TBPU_ESCR1 0x000003c3 +#define MSR_P4_TC_ESCR0 0x000003c4 +#define MSR_P4_TC_ESCR1 0x000003c5 +#define MSR_P4_U2L_ESCR0 0x000003b0 +#define MSR_P4_U2L_ESCR1 0x000003b1 + +#define MSR_P4_PEBS_MATRIX_VERT 0x000003f2 + +/* Intel Core-based CPU performance counters */ +#define MSR_CORE_PERF_FIXED_CTR0 0x00000309 +#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a +#define MSR_CORE_PERF_FIXED_CTR2 0x0000030b +#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d +#define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e +#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390 + +/* Geode defined MSRs */ +#define MSR_GEODE_BUSCONT_CONF0 0x00001900 + +/* Intel VT MSRs */ +#define MSR_IA32_VMX_BASIC 0x00000480 +#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481 +#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482 +#define MSR_IA32_VMX_EXIT_CTLS 0x00000483 +#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484 +#define MSR_IA32_VMX_MISC 0x00000485 +#define MSR_IA32_VMX_CR0_FIXED0 0x00000486 +#define MSR_IA32_VMX_CR0_FIXED1 0x00000487 +#define MSR_IA32_VMX_CR4_FIXED0 0x00000488 +#define MSR_IA32_VMX_CR4_FIXED1 0x00000489 +#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a +#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b +#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c +#define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x0000048d +#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e +#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f +#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 +#define MSR_IA32_VMX_VMFUNC 0x00000491 + +/* VMX_BASIC bits and bitmasks */ +#define VMX_BASIC_VMCS_SIZE_SHIFT 32 +#define VMX_BASIC_TRUE_CTLS (1ULL << 55) +#define VMX_BASIC_64 0x0001000000000000LLU +#define VMX_BASIC_MEM_TYPE_SHIFT 50 +#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU +#define VMX_BASIC_MEM_TYPE_WB 6LLU +#define VMX_BASIC_INOUT 0x0040000000000000LLU + +/* MSR_IA32_VMX_MISC bits */ +#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) +#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F +/* AMD-V MSRs */ + +#define MSR_VM_CR 0xc0010114 +#define MSR_VM_IGNNE 0xc0010115 +#define MSR_VM_HSAVE_PA 0xc0010117 + +#endif /* !SELFTEST_KVM_X86_H */ diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c new file mode 100644 index 000000000000..c9f5b7d4ce38 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/assert.c @@ -0,0 +1,87 @@ +/* + * tools/testing/selftests/kvm/lib/assert.c + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#define _GNU_SOURCE /* for getline(3) and strchrnul(3)*/ + +#include "test_util.h" + +#include <execinfo.h> +#include <sys/syscall.h> + +/* Dumps the current stack trace to stderr. */ +static void __attribute__((noinline)) test_dump_stack(void); +static void test_dump_stack(void) +{ + /* + * Build and run this command: + * + * addr2line -s -e /proc/$PPID/exe -fpai {backtrace addresses} | \ + * grep -v test_dump_stack | cat -n 1>&2 + * + * Note that the spacing is different and there's no newline. + */ + size_t i; + size_t n = 20; + void *stack[n]; + const char *addr2line = "addr2line -s -e /proc/$PPID/exe -fpai"; + const char *pipeline = "|cat -n 1>&2"; + char cmd[strlen(addr2line) + strlen(pipeline) + + /* N bytes per addr * 2 digits per byte + 1 space per addr: */ + n * (((sizeof(void *)) * 2) + 1) + + /* Null terminator: */ + 1]; + char *c; + + n = backtrace(stack, n); + c = &cmd[0]; + c += sprintf(c, "%s", addr2line); + /* + * Skip the first 3 frames: backtrace, test_dump_stack, and + * test_assert. We hope that backtrace isn't inlined and the other two + * we've declared noinline. + */ + for (i = 2; i < n; i++) + c += sprintf(c, " %lx", ((unsigned long) stack[i]) - 1); + c += sprintf(c, "%s", pipeline); +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-result" + system(cmd); +#pragma GCC diagnostic pop +} + +static pid_t gettid(void) +{ + return syscall(SYS_gettid); +} + +void __attribute__((noinline)) +test_assert(bool exp, const char *exp_str, + const char *file, unsigned int line, const char *fmt, ...) +{ + va_list ap; + + if (!(exp)) { + va_start(ap, fmt); + + fprintf(stderr, "==== Test Assertion Failure ====\n" + " %s:%u: %s\n" + " pid=%d tid=%d\n", + file, line, exp_str, getpid(), gettid()); + test_dump_stack(); + if (fmt) { + fputs(" ", stderr); + vfprintf(stderr, fmt, ap); + fputs("\n", stderr); + } + va_end(ap); + + exit(254); + } + + return; +} diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c new file mode 100644 index 000000000000..5eb857584aa3 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -0,0 +1,197 @@ +/* + * tools/testing/selftests/kvm/lib/elf.c + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#include "test_util.h" + +#include <bits/endian.h> +#include <linux/elf.h> + +#include "kvm_util.h" +#include "kvm_util_internal.h" + +static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp) +{ + off_t offset_rv; + + /* Open the ELF file. */ + int fd; + fd = open(filename, O_RDONLY); + TEST_ASSERT(fd >= 0, "Failed to open ELF file,\n" + " filename: %s\n" + " rv: %i errno: %i", filename, fd, errno); + + /* Read in and validate ELF Identification Record. + * The ELF Identification record is the first 16 (EI_NIDENT) bytes + * of the ELF header, which is at the beginning of the ELF file. + * For now it is only safe to read the first EI_NIDENT bytes. Once + * read and validated, the value of e_ehsize can be used to determine + * the real size of the ELF header. + */ + unsigned char ident[EI_NIDENT]; + test_read(fd, ident, sizeof(ident)); + TEST_ASSERT((ident[EI_MAG0] == ELFMAG0) && (ident[EI_MAG1] == ELFMAG1) + && (ident[EI_MAG2] == ELFMAG2) && (ident[EI_MAG3] == ELFMAG3), + "ELF MAGIC Mismatch,\n" + " filename: %s\n" + " ident[EI_MAG0 - EI_MAG3]: %02x %02x %02x %02x\n" + " Expected: %02x %02x %02x %02x", + filename, + ident[EI_MAG0], ident[EI_MAG1], ident[EI_MAG2], ident[EI_MAG3], + ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3); + TEST_ASSERT(ident[EI_CLASS] == ELFCLASS64, + "Current implementation only able to handle ELFCLASS64,\n" + " filename: %s\n" + " ident[EI_CLASS]: %02x\n" + " expected: %02x", + filename, + ident[EI_CLASS], ELFCLASS64); + TEST_ASSERT(((BYTE_ORDER == LITTLE_ENDIAN) + && (ident[EI_DATA] == ELFDATA2LSB)) + || ((BYTE_ORDER == BIG_ENDIAN) + && (ident[EI_DATA] == ELFDATA2MSB)), "Current " + "implementation only able to handle\n" + "cases where the host and ELF file endianness\n" + "is the same:\n" + " host BYTE_ORDER: %u\n" + " host LITTLE_ENDIAN: %u\n" + " host BIG_ENDIAN: %u\n" + " ident[EI_DATA]: %u\n" + " ELFDATA2LSB: %u\n" + " ELFDATA2MSB: %u", + BYTE_ORDER, LITTLE_ENDIAN, BIG_ENDIAN, + ident[EI_DATA], ELFDATA2LSB, ELFDATA2MSB); + TEST_ASSERT(ident[EI_VERSION] == EV_CURRENT, + "Current implementation only able to handle current " + "ELF version,\n" + " filename: %s\n" + " ident[EI_VERSION]: %02x\n" + " expected: %02x", + filename, ident[EI_VERSION], EV_CURRENT); + + /* Read in the ELF header. + * With the ELF Identification portion of the ELF header + * validated, especially that the value at EI_VERSION is + * as expected, it is now safe to read the entire ELF header. + */ + offset_rv = lseek(fd, 0, SEEK_SET); + TEST_ASSERT(offset_rv == 0, "Seek to ELF header failed,\n" + " rv: %zi expected: %i", offset_rv, 0); + test_read(fd, hdrp, sizeof(*hdrp)); + TEST_ASSERT(hdrp->e_phentsize == sizeof(Elf64_Phdr), + "Unexpected physical header size,\n" + " hdrp->e_phentsize: %x\n" + " expected: %zx", + hdrp->e_phentsize, sizeof(Elf64_Phdr)); + TEST_ASSERT(hdrp->e_shentsize == sizeof(Elf64_Shdr), + "Unexpected section header size,\n" + " hdrp->e_shentsize: %x\n" + " expected: %zx", + hdrp->e_shentsize, sizeof(Elf64_Shdr)); +} + +/* VM ELF Load + * + * Input Args: + * filename - Path to ELF file + * + * Output Args: None + * + * Input/Output Args: + * vm - Pointer to opaque type that describes the VM. + * + * Return: None, TEST_ASSERT failures for all error conditions + * + * Loads the program image of the ELF file specified by filename, + * into the virtual address space of the VM pointed to by vm. On entry + * the VM needs to not be using any of the virtual address space used + * by the image and it needs to have sufficient available physical pages, to + * back the virtual pages used to load the image. + */ +void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename, + uint32_t data_memslot, uint32_t pgd_memslot) +{ + off_t offset, offset_rv; + Elf64_Ehdr hdr; + + /* Open the ELF file. */ + int fd; + fd = open(filename, O_RDONLY); + TEST_ASSERT(fd >= 0, "Failed to open ELF file,\n" + " filename: %s\n" + " rv: %i errno: %i", filename, fd, errno); + + /* Read in the ELF header. */ + elfhdr_get(filename, &hdr); + + /* For each program header. + * The following ELF header members specify the location + * and size of the program headers: + * + * e_phoff - File offset to start of program headers + * e_phentsize - Size of each program header + * e_phnum - Number of program header entries + */ + for (unsigned int n1 = 0; n1 < hdr.e_phnum; n1++) { + /* Seek to the beginning of the program header. */ + offset = hdr.e_phoff + (n1 * hdr.e_phentsize); + offset_rv = lseek(fd, offset, SEEK_SET); + TEST_ASSERT(offset_rv == offset, + "Failed to seek to begining of program header %u,\n" + " filename: %s\n" + " rv: %jd errno: %i", + n1, filename, (intmax_t) offset_rv, errno); + + /* Read in the program header. */ + Elf64_Phdr phdr; + test_read(fd, &phdr, sizeof(phdr)); + + /* Skip if this header doesn't describe a loadable segment. */ + if (phdr.p_type != PT_LOAD) + continue; + + /* Allocate memory for this segment within the VM. */ + TEST_ASSERT(phdr.p_memsz > 0, "Unexpected loadable segment " + "memsize of 0,\n" + " phdr index: %u p_memsz: 0x%" PRIx64, + n1, (uint64_t) phdr.p_memsz); + vm_vaddr_t seg_vstart = phdr.p_vaddr; + seg_vstart &= ~(vm_vaddr_t)(vm->page_size - 1); + vm_vaddr_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1; + seg_vend |= vm->page_size - 1; + size_t seg_size = seg_vend - seg_vstart + 1; + + vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart, + data_memslot, pgd_memslot); + TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate " + "virtual memory for segment at requested min addr,\n" + " segment idx: %u\n" + " seg_vstart: 0x%lx\n" + " vaddr: 0x%lx", + n1, seg_vstart, vaddr); + memset(addr_gva2hva(vm, vaddr), 0, seg_size); + /* TODO(lhuemill): Set permissions of each memory segment + * based on the least-significant 3 bits of phdr.p_flags. + */ + + /* Load portion of initial state that is contained within + * the ELF file. + */ + if (phdr.p_filesz) { + offset_rv = lseek(fd, phdr.p_offset, SEEK_SET); + TEST_ASSERT(offset_rv == phdr.p_offset, + "Seek to program segment offset failed,\n" + " program header idx: %u errno: %i\n" + " offset_rv: 0x%jx\n" + " expected: 0x%jx\n", + n1, errno, (intmax_t) offset_rv, + (intmax_t) phdr.p_offset); + test_read(fd, addr_gva2hva(vm, phdr.p_vaddr), + phdr.p_filesz); + } + } +} diff --git a/tools/testing/selftests/kvm/lib/io.c b/tools/testing/selftests/kvm/lib/io.c new file mode 100644 index 000000000000..cff869ffe6ee --- /dev/null +++ b/tools/testing/selftests/kvm/lib/io.c @@ -0,0 +1,158 @@ +/* + * tools/testing/selftests/kvm/lib/io.c + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#include "test_util.h" + +/* Test Write + * + * A wrapper for write(2), that automatically handles the following + * special conditions: + * + * + Interrupted system call (EINTR) + * + Write of less than requested amount + * + Non-block return (EAGAIN) + * + * For each of the above, an additional write is performed to automatically + * continue writing the requested data. + * There are also many cases where write(2) can return an unexpected + * error (e.g. EIO). Such errors cause a TEST_ASSERT failure. + * + * Note, for function signature compatibility with write(2), this function + * returns the number of bytes written, but that value will always be equal + * to the number of requested bytes. All other conditions in this and + * future enhancements to this function either automatically issue another + * write(2) or cause a TEST_ASSERT failure. + * + * Args: + * fd - Opened file descriptor to file to be written. + * count - Number of bytes to write. + * + * Output: + * buf - Starting address of data to be written. + * + * Return: + * On success, number of bytes written. + * On failure, a TEST_ASSERT failure is caused. + */ +ssize_t test_write(int fd, const void *buf, size_t count) +{ + ssize_t rc; + ssize_t num_written = 0; + size_t num_left = count; + const char *ptr = buf; + + /* Note: Count of zero is allowed (see "RETURN VALUE" portion of + * write(2) manpage for details. + */ + TEST_ASSERT(count >= 0, "Unexpected count, count: %li", count); + + do { + rc = write(fd, ptr, num_left); + + switch (rc) { + case -1: + TEST_ASSERT(errno == EAGAIN || errno == EINTR, + "Unexpected write failure,\n" + " rc: %zi errno: %i", rc, errno); + continue; + + case 0: + TEST_ASSERT(false, "Unexpected EOF,\n" + " rc: %zi num_written: %zi num_left: %zu", + rc, num_written, num_left); + break; + + default: + TEST_ASSERT(rc >= 0, "Unexpected ret from write,\n" + " rc: %zi errno: %i", rc, errno); + num_written += rc; + num_left -= rc; + ptr += rc; + break; + } + } while (num_written < count); + + return num_written; +} + +/* Test Read + * + * A wrapper for read(2), that automatically handles the following + * special conditions: + * + * + Interrupted system call (EINTR) + * + Read of less than requested amount + * + Non-block return (EAGAIN) + * + * For each of the above, an additional read is performed to automatically + * continue reading the requested data. + * There are also many cases where read(2) can return an unexpected + * error (e.g. EIO). Such errors cause a TEST_ASSERT failure. Note, + * it is expected that the file opened by fd at the current file position + * contains at least the number of requested bytes to be read. A TEST_ASSERT + * failure is produced if an End-Of-File condition occurs, before all the + * data is read. It is the callers responsibility to assure that sufficient + * data exists. + * + * Note, for function signature compatibility with read(2), this function + * returns the number of bytes read, but that value will always be equal + * to the number of requested bytes. All other conditions in this and + * future enhancements to this function either automatically issue another + * read(2) or cause a TEST_ASSERT failure. + * + * Args: + * fd - Opened file descriptor to file to be read. + * count - Number of bytes to read. + * + * Output: + * buf - Starting address of where to write the bytes read. + * + * Return: + * On success, number of bytes read. + * On failure, a TEST_ASSERT failure is caused. + */ +ssize_t test_read(int fd, void *buf, size_t count) +{ + ssize_t rc; + ssize_t num_read = 0; + size_t num_left = count; + char *ptr = buf; + + /* Note: Count of zero is allowed (see "If count is zero" portion of + * read(2) manpage for details. + */ + TEST_ASSERT(count >= 0, "Unexpected count, count: %li", count); + + do { + rc = read(fd, ptr, num_left); + + switch (rc) { + case -1: + TEST_ASSERT(errno == EAGAIN || errno == EINTR, + "Unexpected read failure,\n" + " rc: %zi errno: %i", rc, errno); + break; + + case 0: + TEST_ASSERT(false, "Unexpected EOF,\n" + " rc: %zi num_read: %zi num_left: %zu", + rc, num_read, num_left); + break; + + default: + TEST_ASSERT(rc > 0, "Unexpected ret from read,\n" + " rc: %zi errno: %i", rc, errno); + num_read += rc; + num_left -= rc; + ptr += rc; + break; + } + } while (num_read < count); + + return num_read; +} diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c new file mode 100644 index 000000000000..7ca1bb40c498 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -0,0 +1,1480 @@ +/* + * tools/testing/selftests/kvm/lib/kvm_util.c + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "kvm_util_internal.h" + +#include <assert.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> + +#define KVM_DEV_PATH "/dev/kvm" + +#define KVM_UTIL_PGS_PER_HUGEPG 512 +#define KVM_UTIL_MIN_PADDR 0x2000 + +/* Aligns x up to the next multiple of size. Size must be a power of 2. */ +static void *align(void *x, size_t size) +{ + size_t mask = size - 1; + TEST_ASSERT(size != 0 && !(size & (size - 1)), + "size not a power of 2: %lu", size); + return (void *) (((size_t) x + mask) & ~mask); +} + +/* Capability + * + * Input Args: + * cap - Capability + * + * Output Args: None + * + * Return: + * On success, the Value corresponding to the capability (KVM_CAP_*) + * specified by the value of cap. On failure a TEST_ASSERT failure + * is produced. + * + * Looks up and returns the value corresponding to the capability + * (KVM_CAP_*) given by cap. + */ +int kvm_check_cap(long cap) +{ + int ret; + int kvm_fd; + + kvm_fd = open(KVM_DEV_PATH, O_RDONLY); + TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", + KVM_DEV_PATH, kvm_fd, errno); + + ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap); + TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n" + " rc: %i errno: %i", ret, errno); + + close(kvm_fd); + + return ret; +} + +/* VM Create + * + * Input Args: + * mode - VM Mode (e.g. VM_MODE_FLAT48PG) + * phy_pages - Physical memory pages + * perm - permission + * + * Output Args: None + * + * Return: + * Pointer to opaque structure that describes the created VM. + * + * Creates a VM with the mode specified by mode (e.g. VM_MODE_FLAT48PG). + * When phy_pages is non-zero, a memory region of phy_pages physical pages + * is created and mapped starting at guest physical address 0. The file + * descriptor to control the created VM is created with the permissions + * given by perm (e.g. O_RDWR). + */ +struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) +{ + struct kvm_vm *vm; + int kvm_fd; + + /* Allocate memory. */ + vm = calloc(1, sizeof(*vm)); + TEST_ASSERT(vm != NULL, "Insufficent Memory"); + + vm->mode = mode; + kvm_fd = open(KVM_DEV_PATH, perm); + TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", + KVM_DEV_PATH, kvm_fd, errno); + + /* Create VM. */ + vm->fd = ioctl(kvm_fd, KVM_CREATE_VM, NULL); + TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, " + "rc: %i errno: %i", vm->fd, errno); + + close(kvm_fd); + + /* Setup mode specific traits. */ + switch (vm->mode) { + case VM_MODE_FLAT48PG: + vm->page_size = 0x1000; + vm->page_shift = 12; + + /* Limit to 48-bit canonical virtual addresses. */ + vm->vpages_valid = sparsebit_alloc(); + sparsebit_set_num(vm->vpages_valid, + 0, (1ULL << (48 - 1)) >> vm->page_shift); + sparsebit_set_num(vm->vpages_valid, + (~((1ULL << (48 - 1)) - 1)) >> vm->page_shift, + (1ULL << (48 - 1)) >> vm->page_shift); + + /* Limit physical addresses to 52-bits. */ + vm->max_gfn = ((1ULL << 52) >> vm->page_shift) - 1; + break; + + default: + TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode); + } + + /* Allocate and setup memory for guest. */ + vm->vpages_mapped = sparsebit_alloc(); + if (phy_pages != 0) + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + 0, 0, phy_pages, 0); + + return vm; +} + +/* Userspace Memory Region Find + * + * Input Args: + * vm - Virtual Machine + * start - Starting VM physical address + * end - Ending VM physical address, inclusive. + * + * Output Args: None + * + * Return: + * Pointer to overlapping region, NULL if no such region. + * + * Searches for a region with any physical memory that overlaps with + * any portion of the guest physical addresses from start to end + * inclusive. If multiple overlapping regions exist, a pointer to any + * of the regions is returned. Null is returned only when no overlapping + * region exists. + */ +static struct userspace_mem_region *userspace_mem_region_find( + struct kvm_vm *vm, uint64_t start, uint64_t end) +{ + struct userspace_mem_region *region; + + for (region = vm->userspace_mem_region_head; region; + region = region->next) { + uint64_t existing_start = region->region.guest_phys_addr; + uint64_t existing_end = region->region.guest_phys_addr + + region->region.memory_size - 1; + if (start <= existing_end && end >= existing_start) + return region; + } + + return NULL; +} + +/* KVM Userspace Memory Region Find + * + * Input Args: + * vm - Virtual Machine + * start - Starting VM physical address + * end - Ending VM physical address, inclusive. + * + * Output Args: None + * + * Return: + * Pointer to overlapping region, NULL if no such region. + * + * Public interface to userspace_mem_region_find. Allows tests to look up + * the memslot datastructure for a given range of guest physical memory. + */ +struct kvm_userspace_memory_region * +kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, + uint64_t end) +{ + struct userspace_mem_region *region; + + region = userspace_mem_region_find(vm, start, end); + if (!region) + return NULL; + + return ®ion->region; +} + +/* VCPU Find + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * + * Output Args: None + * + * Return: + * Pointer to VCPU structure + * + * Locates a vcpu structure that describes the VCPU specified by vcpuid and + * returns a pointer to it. Returns NULL if the VM doesn't contain a VCPU + * for the specified vcpuid. + */ +struct vcpu *vcpu_find(struct kvm_vm *vm, + uint32_t vcpuid) +{ + struct vcpu *vcpup; + + for (vcpup = vm->vcpu_head; vcpup; vcpup = vcpup->next) { + if (vcpup->id == vcpuid) + return vcpup; + } + + return NULL; +} + +/* VM VCPU Remove + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * + * Output Args: None + * + * Return: None, TEST_ASSERT failures for all error conditions + * + * Within the VM specified by vm, removes the VCPU given by vcpuid. + */ +static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + + int ret = close(vcpu->fd); + TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i " + "errno: %i", ret, errno); + + if (vcpu->next) + vcpu->next->prev = vcpu->prev; + if (vcpu->prev) + vcpu->prev->next = vcpu->next; + else + vm->vcpu_head = vcpu->next; + free(vcpu); +} + + +/* Destroys and frees the VM pointed to by vmp. + */ +void kvm_vm_free(struct kvm_vm *vmp) +{ + int ret; + + if (vmp == NULL) + return; + + /* Free userspace_mem_regions. */ + while (vmp->userspace_mem_region_head) { + struct userspace_mem_region *region + = vmp->userspace_mem_region_head; + + region->region.memory_size = 0; + ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, + ®ion->region); + TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, " + "rc: %i errno: %i", ret, errno); + + vmp->userspace_mem_region_head = region->next; + sparsebit_free(®ion->unused_phy_pages); + ret = munmap(region->mmap_start, region->mmap_size); + TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i", + ret, errno); + + free(region); + } + + /* Free VCPUs. */ + while (vmp->vcpu_head) + vm_vcpu_rm(vmp, vmp->vcpu_head->id); + + /* Free sparsebit arrays. */ + sparsebit_free(&vmp->vpages_valid); + sparsebit_free(&vmp->vpages_mapped); + + /* Close file descriptor for the VM. */ + ret = close(vmp->fd); + TEST_ASSERT(ret == 0, "Close of vm fd failed,\n" + " vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno); + + /* Free the structure describing the VM. */ + free(vmp); +} + +/* Memory Compare, host virtual to guest virtual + * + * Input Args: + * hva - Starting host virtual address + * vm - Virtual Machine + * gva - Starting guest virtual address + * len - number of bytes to compare + * + * Output Args: None + * + * Input/Output Args: None + * + * Return: + * Returns 0 if the bytes starting at hva for a length of len + * are equal the guest virtual bytes starting at gva. Returns + * a value < 0, if bytes at hva are less than those at gva. + * Otherwise a value > 0 is returned. + * + * Compares the bytes starting at the host virtual address hva, for + * a length of len, to the guest bytes starting at the guest virtual + * address given by gva. + */ +int kvm_memcmp_hva_gva(void *hva, + struct kvm_vm *vm, vm_vaddr_t gva, size_t len) +{ + size_t amt; + + /* Compare a batch of bytes until either a match is found + * or all the bytes have been compared. + */ + for (uintptr_t offset = 0; offset < len; offset += amt) { + uintptr_t ptr1 = (uintptr_t)hva + offset; + + /* Determine host address for guest virtual address + * at offset. + */ + uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset); + + /* Determine amount to compare on this pass. + * Don't allow the comparsion to cross a page boundary. + */ + amt = len - offset; + if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift)) + amt = vm->page_size - (ptr1 % vm->page_size); + if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift)) + amt = vm->page_size - (ptr2 % vm->page_size); + + assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift)); + assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift)); + + /* Perform the comparison. If there is a difference + * return that result to the caller, otherwise need + * to continue on looking for a mismatch. + */ + int ret = memcmp((void *)ptr1, (void *)ptr2, amt); + if (ret != 0) + return ret; + } + + /* No mismatch found. Let the caller know the two memory + * areas are equal. + */ + return 0; +} + +/* Allocate an instance of struct kvm_cpuid2 + * + * Input Args: None + * + * Output Args: None + * + * Return: A pointer to the allocated struct. The caller is responsible + * for freeing this struct. + * + * Since kvm_cpuid2 uses a 0-length array to allow a the size of the + * array to be decided at allocation time, allocation is slightly + * complicated. This function uses a reasonable default length for + * the array and performs the appropriate allocation. + */ +struct kvm_cpuid2 *allocate_kvm_cpuid2(void) +{ + struct kvm_cpuid2 *cpuid; + int nent = 100; + size_t size; + + size = sizeof(*cpuid); + size += nent * sizeof(struct kvm_cpuid_entry2); + cpuid = malloc(size); + if (!cpuid) { + perror("malloc"); + abort(); + } + + cpuid->nent = nent; + + return cpuid; +} + +/* KVM Supported CPUID Get + * + * Input Args: None + * + * Output Args: + * cpuid - The supported KVM CPUID + * + * Return: void + * + * Get the guest CPUID supported by KVM. + */ +void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid) +{ + int ret; + int kvm_fd; + + kvm_fd = open(KVM_DEV_PATH, O_RDONLY); + TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", + KVM_DEV_PATH, kvm_fd, errno); + + ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); + TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n", + ret, errno); + + close(kvm_fd); +} + +/* Locate a cpuid entry. + * + * Input Args: + * cpuid: The cpuid. + * function: The function of the cpuid entry to find. + * + * Output Args: None + * + * Return: A pointer to the cpuid entry. Never returns NULL. + */ +struct kvm_cpuid_entry2 * +find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function, + uint32_t index) +{ + struct kvm_cpuid_entry2 *entry = NULL; + int i; + + for (i = 0; i < cpuid->nent; i++) { + if (cpuid->entries[i].function == function && + cpuid->entries[i].index == index) { + entry = &cpuid->entries[i]; + break; + } + } + + TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).", + function, index); + return entry; +} + +/* VM Userspace Memory Region Add + * + * Input Args: + * vm - Virtual Machine + * backing_src - Storage source for this region. + * NULL to use anonymous memory. + * guest_paddr - Starting guest physical address + * slot - KVM region slot + * npages - Number of physical pages + * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES) + * + * Output Args: None + * + * Return: None + * + * Allocates a memory area of the number of pages specified by npages + * and maps it to the VM specified by vm, at a starting physical address + * given by guest_paddr. The region is created with a KVM region slot + * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The + * region is created with the flags given by flags. + */ +void vm_userspace_mem_region_add(struct kvm_vm *vm, + enum vm_mem_backing_src_type src_type, + uint64_t guest_paddr, uint32_t slot, uint64_t npages, + uint32_t flags) +{ + int ret; + unsigned long pmem_size = 0; + struct userspace_mem_region *region; + size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size; + + TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " + "address not on a page boundary.\n" + " guest_paddr: 0x%lx vm->page_size: 0x%x", + guest_paddr, vm->page_size); + TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1) + <= vm->max_gfn, "Physical range beyond maximum " + "supported physical address,\n" + " guest_paddr: 0x%lx npages: 0x%lx\n" + " vm->max_gfn: 0x%lx vm->page_size: 0x%x", + guest_paddr, npages, vm->max_gfn, vm->page_size); + + /* Confirm a mem region with an overlapping address doesn't + * already exist. + */ + region = (struct userspace_mem_region *) userspace_mem_region_find( + vm, guest_paddr, guest_paddr + npages * vm->page_size); + if (region != NULL) + TEST_ASSERT(false, "overlapping userspace_mem_region already " + "exists\n" + " requested guest_paddr: 0x%lx npages: 0x%lx " + "page_size: 0x%x\n" + " existing guest_paddr: 0x%lx size: 0x%lx", + guest_paddr, npages, vm->page_size, + (uint64_t) region->region.guest_phys_addr, + (uint64_t) region->region.memory_size); + + /* Confirm no region with the requested slot already exists. */ + for (region = vm->userspace_mem_region_head; region; + region = region->next) { + if (region->region.slot == slot) + break; + if ((guest_paddr <= (region->region.guest_phys_addr + + region->region.memory_size)) + && ((guest_paddr + npages * vm->page_size) + >= region->region.guest_phys_addr)) + break; + } + if (region != NULL) + TEST_ASSERT(false, "A mem region with the requested slot " + "or overlapping physical memory range already exists.\n" + " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" + " existing slot: %u paddr: 0x%lx size: 0x%lx", + slot, guest_paddr, npages, + region->region.slot, + (uint64_t) region->region.guest_phys_addr, + (uint64_t) region->region.memory_size); + + /* Allocate and initialize new mem region structure. */ + region = calloc(1, sizeof(*region)); + TEST_ASSERT(region != NULL, "Insufficient Memory"); + region->mmap_size = npages * vm->page_size; + + /* Enough memory to align up to a huge page. */ + if (src_type == VM_MEM_SRC_ANONYMOUS_THP) + region->mmap_size += huge_page_size; + region->mmap_start = mmap(NULL, region->mmap_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS + | (src_type == VM_MEM_SRC_ANONYMOUS_HUGETLB ? MAP_HUGETLB : 0), + -1, 0); + TEST_ASSERT(region->mmap_start != MAP_FAILED, + "test_malloc failed, mmap_start: %p errno: %i", + region->mmap_start, errno); + + /* Align THP allocation up to start of a huge page. */ + region->host_mem = align(region->mmap_start, + src_type == VM_MEM_SRC_ANONYMOUS_THP ? huge_page_size : 1); + + /* As needed perform madvise */ + if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) { + ret = madvise(region->host_mem, npages * vm->page_size, + src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); + TEST_ASSERT(ret == 0, "madvise failed,\n" + " addr: %p\n" + " length: 0x%lx\n" + " src_type: %x", + region->host_mem, npages * vm->page_size, src_type); + } + + region->unused_phy_pages = sparsebit_alloc(); + sparsebit_set_num(region->unused_phy_pages, + guest_paddr >> vm->page_shift, npages); + region->region.slot = slot; + region->region.flags = flags; + region->region.guest_phys_addr = guest_paddr; + region->region.memory_size = npages * vm->page_size; + region->region.userspace_addr = (uintptr_t) region->host_mem; + ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); + TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" + " rc: %i errno: %i\n" + " slot: %u flags: 0x%x\n" + " guest_phys_addr: 0x%lx size: 0x%lx", + ret, errno, slot, flags, + guest_paddr, (uint64_t) region->region.memory_size); + + /* Add to linked-list of memory regions. */ + if (vm->userspace_mem_region_head) + vm->userspace_mem_region_head->prev = region; + region->next = vm->userspace_mem_region_head; + vm->userspace_mem_region_head = region; +} + +/* Memslot to region + * + * Input Args: + * vm - Virtual Machine + * memslot - KVM memory slot ID + * + * Output Args: None + * + * Return: + * Pointer to memory region structure that describe memory region + * using kvm memory slot ID given by memslot. TEST_ASSERT failure + * on error (e.g. currently no memory region using memslot as a KVM + * memory slot ID). + */ +static struct userspace_mem_region *memslot2region(struct kvm_vm *vm, + uint32_t memslot) +{ + struct userspace_mem_region *region; + + for (region = vm->userspace_mem_region_head; region; + region = region->next) { + if (region->region.slot == memslot) + break; + } + if (region == NULL) { + fprintf(stderr, "No mem region with the requested slot found,\n" + " requested slot: %u\n", memslot); + fputs("---- vm dump ----\n", stderr); + vm_dump(stderr, vm, 2); + TEST_ASSERT(false, "Mem region not found"); + } + + return region; +} + +/* VM Memory Region Flags Set + * + * Input Args: + * vm - Virtual Machine + * flags - Starting guest physical address + * + * Output Args: None + * + * Return: None + * + * Sets the flags of the memory region specified by the value of slot, + * to the values given by flags. + */ +void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) +{ + int ret; + struct userspace_mem_region *region; + + /* Locate memory region. */ + region = memslot2region(vm, slot); + + region->region.flags = flags; + + ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); + + TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" + " rc: %i errno: %i slot: %u flags: 0x%x", + ret, errno, slot, flags); +} + +/* VCPU mmap Size + * + * Input Args: None + * + * Output Args: None + * + * Return: + * Size of VCPU state + * + * Returns the size of the structure pointed to by the return value + * of vcpu_state(). + */ +static int vcpu_mmap_sz(void) +{ + int dev_fd, ret; + + dev_fd = open(KVM_DEV_PATH, O_RDONLY); + TEST_ASSERT(dev_fd >= 0, "%s open %s failed, rc: %i errno: %i", + __func__, KVM_DEV_PATH, dev_fd, errno); + + ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); + TEST_ASSERT(ret >= sizeof(struct kvm_run), + "%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i", + __func__, ret, errno); + + close(dev_fd); + + return ret; +} + +/* VM VCPU Add + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * + * Output Args: None + * + * Return: None + * + * Creates and adds to the VM specified by vm and virtual CPU with + * the ID given by vcpuid. + */ +void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct vcpu *vcpu; + + /* Confirm a vcpu with the specified id doesn't already exist. */ + vcpu = vcpu_find(vm, vcpuid); + if (vcpu != NULL) + TEST_ASSERT(false, "vcpu with the specified id " + "already exists,\n" + " requested vcpuid: %u\n" + " existing vcpuid: %u state: %p", + vcpuid, vcpu->id, vcpu->state); + + /* Allocate and initialize new vcpu structure. */ + vcpu = calloc(1, sizeof(*vcpu)); + TEST_ASSERT(vcpu != NULL, "Insufficient Memory"); + vcpu->id = vcpuid; + vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid); + TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i", + vcpu->fd, errno); + + TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size " + "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", + vcpu_mmap_sz(), sizeof(*vcpu->state)); + vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state), + PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); + TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, " + "vcpu id: %u errno: %i", vcpuid, errno); + + /* Add to linked-list of VCPUs. */ + if (vm->vcpu_head) + vm->vcpu_head->prev = vcpu; + vcpu->next = vm->vcpu_head; + vm->vcpu_head = vcpu; + + vcpu_setup(vm, vcpuid); +} + +/* VM Virtual Address Unused Gap + * + * Input Args: + * vm - Virtual Machine + * sz - Size (bytes) + * vaddr_min - Minimum Virtual Address + * + * Output Args: None + * + * Return: + * Lowest virtual address at or below vaddr_min, with at least + * sz unused bytes. TEST_ASSERT failure if no area of at least + * size sz is available. + * + * Within the VM specified by vm, locates the lowest starting virtual + * address >= vaddr_min, that has at least sz unallocated bytes. A + * TEST_ASSERT failure occurs for invalid input or no area of at least + * sz unallocated bytes >= vaddr_min is available. + */ +static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, + vm_vaddr_t vaddr_min) +{ + uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; + + /* Determine lowest permitted virtual page index. */ + uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift; + if ((pgidx_start * vm->page_size) < vaddr_min) + goto no_va_found; + + /* Loop over section with enough valid virtual page indexes. */ + if (!sparsebit_is_set_num(vm->vpages_valid, + pgidx_start, pages)) + pgidx_start = sparsebit_next_set_num(vm->vpages_valid, + pgidx_start, pages); + do { + /* + * Are there enough unused virtual pages available at + * the currently proposed starting virtual page index. + * If not, adjust proposed starting index to next + * possible. + */ + if (sparsebit_is_clear_num(vm->vpages_mapped, + pgidx_start, pages)) + goto va_found; + pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped, + pgidx_start, pages); + if (pgidx_start == 0) + goto no_va_found; + + /* + * If needed, adjust proposed starting virtual address, + * to next range of valid virtual addresses. + */ + if (!sparsebit_is_set_num(vm->vpages_valid, + pgidx_start, pages)) { + pgidx_start = sparsebit_next_set_num( + vm->vpages_valid, pgidx_start, pages); + if (pgidx_start == 0) + goto no_va_found; + } + } while (pgidx_start != 0); + +no_va_found: + TEST_ASSERT(false, "No vaddr of specified pages available, " + "pages: 0x%lx", pages); + + /* NOT REACHED */ + return -1; + +va_found: + TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid, + pgidx_start, pages), + "Unexpected, invalid virtual page index range,\n" + " pgidx_start: 0x%lx\n" + " pages: 0x%lx", + pgidx_start, pages); + TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped, + pgidx_start, pages), + "Unexpected, pages already mapped,\n" + " pgidx_start: 0x%lx\n" + " pages: 0x%lx", + pgidx_start, pages); + + return pgidx_start * vm->page_size; +} + +/* VM Virtual Address Allocate + * + * Input Args: + * vm - Virtual Machine + * sz - Size in bytes + * vaddr_min - Minimum starting virtual address + * data_memslot - Memory region slot for data pages + * pgd_memslot - Memory region slot for new virtual translation tables + * + * Output Args: None + * + * Return: + * Starting guest virtual address + * + * Allocates at least sz bytes within the virtual address space of the vm + * given by vm. The allocated bytes are mapped to a virtual address >= + * the address given by vaddr_min. Note that each allocation uses a + * a unique set of pages, with the minimum real allocation being at least + * a page. + */ +vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, + uint32_t data_memslot, uint32_t pgd_memslot) +{ + uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); + + virt_pgd_alloc(vm, pgd_memslot); + + /* Find an unused range of virtual page addresses of at least + * pages in length. + */ + vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min); + + /* Map the virtual pages. */ + for (vm_vaddr_t vaddr = vaddr_start; pages > 0; + pages--, vaddr += vm->page_size) { + vm_paddr_t paddr; + + paddr = vm_phy_page_alloc(vm, KVM_UTIL_MIN_PADDR, data_memslot); + + virt_pg_map(vm, vaddr, paddr, pgd_memslot); + + sparsebit_set(vm->vpages_mapped, + vaddr >> vm->page_shift); + } + + return vaddr_start; +} + +/* Address VM Physical to Host Virtual + * + * Input Args: + * vm - Virtual Machine + * gpa - VM physical address + * + * Output Args: None + * + * Return: + * Equivalent host virtual address + * + * Locates the memory region containing the VM physical address given + * by gpa, within the VM given by vm. When found, the host virtual + * address providing the memory to the vm physical address is returned. + * A TEST_ASSERT failure occurs if no region containing gpa exists. + */ +void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) +{ + struct userspace_mem_region *region; + for (region = vm->userspace_mem_region_head; region; + region = region->next) { + if ((gpa >= region->region.guest_phys_addr) + && (gpa <= (region->region.guest_phys_addr + + region->region.memory_size - 1))) + return (void *) ((uintptr_t) region->host_mem + + (gpa - region->region.guest_phys_addr)); + } + + TEST_ASSERT(false, "No vm physical memory at 0x%lx", gpa); + return NULL; +} + +/* Address Host Virtual to VM Physical + * + * Input Args: + * vm - Virtual Machine + * hva - Host virtual address + * + * Output Args: None + * + * Return: + * Equivalent VM physical address + * + * Locates the memory region containing the host virtual address given + * by hva, within the VM given by vm. When found, the equivalent + * VM physical address is returned. A TEST_ASSERT failure occurs if no + * region containing hva exists. + */ +vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) +{ + struct userspace_mem_region *region; + for (region = vm->userspace_mem_region_head; region; + region = region->next) { + if ((hva >= region->host_mem) + && (hva <= (region->host_mem + + region->region.memory_size - 1))) + return (vm_paddr_t) ((uintptr_t) + region->region.guest_phys_addr + + (hva - (uintptr_t) region->host_mem)); + } + + TEST_ASSERT(false, "No mapping to a guest physical address, " + "hva: %p", hva); + return -1; +} + +/* VM Create IRQ Chip + * + * Input Args: + * vm - Virtual Machine + * + * Output Args: None + * + * Return: None + * + * Creates an interrupt controller chip for the VM specified by vm. + */ +void vm_create_irqchip(struct kvm_vm *vm) +{ + int ret; + + ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0); + TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, " + "rc: %i errno: %i", ret, errno); +} + +/* VM VCPU State + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * + * Output Args: None + * + * Return: + * Pointer to structure that describes the state of the VCPU. + * + * Locates and returns a pointer to a structure that describes the + * state of the VCPU with the given vcpuid. + */ +struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + return vcpu->state; +} + +/* VM VCPU Run + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * + * Output Args: None + * + * Return: None + * + * Switch to executing the code for the VCPU given by vcpuid, within the VM + * given by vm. + */ +void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) +{ + int ret = _vcpu_run(vm, vcpuid); + TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, " + "rc: %i errno: %i", ret, errno); +} + +int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int rc; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + do { + rc = ioctl(vcpu->fd, KVM_RUN, NULL); + } while (rc == -1 && errno == EINTR); + return rc; +} + +/* VM VCPU Set MP State + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * mp_state - mp_state to be set + * + * Output Args: None + * + * Return: None + * + * Sets the MP state of the VCPU given by vcpuid, to the state given + * by mp_state. + */ +void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_mp_state *mp_state) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int ret; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state); + TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, " + "rc: %i errno: %i", ret, errno); +} + +/* VM VCPU Regs Get + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * + * Output Args: + * regs - current state of VCPU regs + * + * Return: None + * + * Obtains the current register state for the VCPU specified by vcpuid + * and stores it at the location given by regs. + */ +void vcpu_regs_get(struct kvm_vm *vm, + uint32_t vcpuid, struct kvm_regs *regs) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int ret; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + /* Get the regs. */ + ret = ioctl(vcpu->fd, KVM_GET_REGS, regs); + TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i", + ret, errno); +} + +/* VM VCPU Regs Set + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * regs - Values to set VCPU regs to + * + * Output Args: None + * + * Return: None + * + * Sets the regs of the VCPU specified by vcpuid to the values + * given by regs. + */ +void vcpu_regs_set(struct kvm_vm *vm, + uint32_t vcpuid, struct kvm_regs *regs) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int ret; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + /* Set the regs. */ + ret = ioctl(vcpu->fd, KVM_SET_REGS, regs); + TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i", + ret, errno); +} + +void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_vcpu_events *events) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int ret; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + /* Get the regs. */ + ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events); + TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i", + ret, errno); +} + +void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_vcpu_events *events) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int ret; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + /* Set the regs. */ + ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events); + TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i", + ret, errno); +} + +/* VM VCPU Args Set + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * num - number of arguments + * ... - arguments, each of type uint64_t + * + * Output Args: None + * + * Return: None + * + * Sets the first num function input arguments to the values + * given as variable args. Each of the variable args is expected to + * be of type uint64_t. + */ +void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) +{ + va_list ap; + struct kvm_regs regs; + + TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n" + " num: %u\n", + num); + + va_start(ap, num); + vcpu_regs_get(vm, vcpuid, ®s); + + if (num >= 1) + regs.rdi = va_arg(ap, uint64_t); + + if (num >= 2) + regs.rsi = va_arg(ap, uint64_t); + + if (num >= 3) + regs.rdx = va_arg(ap, uint64_t); + + if (num >= 4) + regs.rcx = va_arg(ap, uint64_t); + + if (num >= 5) + regs.r8 = va_arg(ap, uint64_t); + + if (num >= 6) + regs.r9 = va_arg(ap, uint64_t); + + vcpu_regs_set(vm, vcpuid, ®s); + va_end(ap); +} + +/* VM VCPU System Regs Get + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * + * Output Args: + * sregs - current state of VCPU system regs + * + * Return: None + * + * Obtains the current system register state for the VCPU specified by + * vcpuid and stores it at the location given by sregs. + */ +void vcpu_sregs_get(struct kvm_vm *vm, + uint32_t vcpuid, struct kvm_sregs *sregs) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int ret; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + /* Get the regs. */ + /* Get the regs. */ + ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs); + TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i", + ret, errno); +} + +/* VM VCPU System Regs Set + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * sregs - Values to set VCPU system regs to + * + * Output Args: None + * + * Return: None + * + * Sets the system regs of the VCPU specified by vcpuid to the values + * given by sregs. + */ +void vcpu_sregs_set(struct kvm_vm *vm, + uint32_t vcpuid, struct kvm_sregs *sregs) +{ + int ret = _vcpu_sregs_set(vm, vcpuid, sregs); + TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, " + "rc: %i errno: %i", ret, errno); +} + +int _vcpu_sregs_set(struct kvm_vm *vm, + uint32_t vcpuid, struct kvm_sregs *sregs) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int ret; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + /* Get the regs. */ + return ioctl(vcpu->fd, KVM_SET_SREGS, sregs); +} + +/* VCPU Ioctl + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * cmd - Ioctl number + * arg - Argument to pass to the ioctl + * + * Return: None + * + * Issues an arbitrary ioctl on a VCPU fd. + */ +void vcpu_ioctl(struct kvm_vm *vm, + uint32_t vcpuid, unsigned long cmd, void *arg) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int ret; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + ret = ioctl(vcpu->fd, cmd, arg); + TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)", + cmd, ret, errno, strerror(errno)); +} + +/* VM Ioctl + * + * Input Args: + * vm - Virtual Machine + * cmd - Ioctl number + * arg - Argument to pass to the ioctl + * + * Return: None + * + * Issues an arbitrary ioctl on a VM fd. + */ +void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) +{ + int ret; + + ret = ioctl(vm->fd, cmd, arg); + TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)", + cmd, ret, errno, strerror(errno)); +} + +/* VM Dump + * + * Input Args: + * vm - Virtual Machine + * indent - Left margin indent amount + * + * Output Args: + * stream - Output FILE stream + * + * Return: None + * + * Dumps the current state of the VM given by vm, to the FILE stream + * given by stream. + */ +void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +{ + struct userspace_mem_region *region; + struct vcpu *vcpu; + + fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode); + fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd); + fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size); + fprintf(stream, "%*sMem Regions:\n", indent, ""); + for (region = vm->userspace_mem_region_head; region; + region = region->next) { + fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx " + "host_virt: %p\n", indent + 2, "", + (uint64_t) region->region.guest_phys_addr, + (uint64_t) region->region.memory_size, + region->host_mem); + fprintf(stream, "%*sunused_phy_pages: ", indent + 2, ""); + sparsebit_dump(stream, region->unused_phy_pages, 0); + } + fprintf(stream, "%*sMapped Virtual Pages:\n", indent, ""); + sparsebit_dump(stream, vm->vpages_mapped, indent + 2); + fprintf(stream, "%*spgd_created: %u\n", indent, "", + vm->pgd_created); + if (vm->pgd_created) { + fprintf(stream, "%*sVirtual Translation Tables:\n", + indent + 2, ""); + virt_dump(stream, vm, indent + 4); + } + fprintf(stream, "%*sVCPUs:\n", indent, ""); + for (vcpu = vm->vcpu_head; vcpu; vcpu = vcpu->next) + vcpu_dump(stream, vm, vcpu->id, indent + 2); +} + +/* VM VCPU Dump + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * indent - Left margin indent amount + * + * Output Args: + * stream - Output FILE stream + * + * Return: None + * + * Dumps the current state of the VCPU specified by vcpuid, within the VM + * given by vm, to the FILE stream given by stream. + */ +void vcpu_dump(FILE *stream, struct kvm_vm *vm, + uint32_t vcpuid, uint8_t indent) +{ + struct kvm_regs regs; + struct kvm_sregs sregs; + + fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid); + + fprintf(stream, "%*sregs:\n", indent + 2, ""); + vcpu_regs_get(vm, vcpuid, ®s); + regs_dump(stream, ®s, indent + 4); + + fprintf(stream, "%*ssregs:\n", indent + 2, ""); + vcpu_sregs_get(vm, vcpuid, &sregs); + sregs_dump(stream, &sregs, indent + 4); +} + +/* Known KVM exit reasons */ +static struct exit_reason { + unsigned int reason; + const char *name; +} exit_reasons_known[] = { + {KVM_EXIT_UNKNOWN, "UNKNOWN"}, + {KVM_EXIT_EXCEPTION, "EXCEPTION"}, + {KVM_EXIT_IO, "IO"}, + {KVM_EXIT_HYPERCALL, "HYPERCALL"}, + {KVM_EXIT_DEBUG, "DEBUG"}, + {KVM_EXIT_HLT, "HLT"}, + {KVM_EXIT_MMIO, "MMIO"}, + {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"}, + {KVM_EXIT_SHUTDOWN, "SHUTDOWN"}, + {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"}, + {KVM_EXIT_INTR, "INTR"}, + {KVM_EXIT_SET_TPR, "SET_TPR"}, + {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"}, + {KVM_EXIT_S390_SIEIC, "S390_SIEIC"}, + {KVM_EXIT_S390_RESET, "S390_RESET"}, + {KVM_EXIT_DCR, "DCR"}, + {KVM_EXIT_NMI, "NMI"}, + {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"}, + {KVM_EXIT_OSI, "OSI"}, + {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"}, +#ifdef KVM_EXIT_MEMORY_NOT_PRESENT + {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"}, +#endif +}; + +/* Exit Reason String + * + * Input Args: + * exit_reason - Exit reason + * + * Output Args: None + * + * Return: + * Constant string pointer describing the exit reason. + * + * Locates and returns a constant string that describes the KVM exit + * reason given by exit_reason. If no such string is found, a constant + * string of "Unknown" is returned. + */ +const char *exit_reason_str(unsigned int exit_reason) +{ + unsigned int n1; + + for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) { + if (exit_reason == exit_reasons_known[n1].reason) + return exit_reasons_known[n1].name; + } + + return "Unknown"; +} + +/* Physical Page Allocate + * + * Input Args: + * vm - Virtual Machine + * paddr_min - Physical address minimum + * memslot - Memory region to allocate page from + * + * Output Args: None + * + * Return: + * Starting physical address + * + * Within the VM specified by vm, locates an available physical page + * at or above paddr_min. If found, the page is marked as in use + * and its address is returned. A TEST_ASSERT failure occurs if no + * page is available at or above paddr_min. + */ +vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, + vm_paddr_t paddr_min, uint32_t memslot) +{ + struct userspace_mem_region *region; + sparsebit_idx_t pg; + + TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " + "not divisable by page size.\n" + " paddr_min: 0x%lx page_size: 0x%x", + paddr_min, vm->page_size); + + /* Locate memory region. */ + region = memslot2region(vm, memslot); + + /* Locate next available physical page at or above paddr_min. */ + pg = paddr_min >> vm->page_shift; + + if (!sparsebit_is_set(region->unused_phy_pages, pg)) { + pg = sparsebit_next_set(region->unused_phy_pages, pg); + if (pg == 0) { + fprintf(stderr, "No guest physical page available, " + "paddr_min: 0x%lx page_size: 0x%x memslot: %u", + paddr_min, vm->page_size, memslot); + fputs("---- vm dump ----\n", stderr); + vm_dump(stderr, vm, 2); + abort(); + } + } + + /* Specify page as in use and return its address. */ + sparsebit_clear(region->unused_phy_pages, pg); + + return pg * vm->page_size; +} + +/* Address Guest Virtual to Host Virtual + * + * Input Args: + * vm - Virtual Machine + * gva - VM virtual address + * + * Output Args: None + * + * Return: + * Equivalent host virtual address + */ +void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva) +{ + return addr_gpa2hva(vm, addr_gva2gpa(vm, gva)); +} diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h new file mode 100644 index 000000000000..a0bd1980c81c --- /dev/null +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -0,0 +1,67 @@ +/* + * tools/testing/selftests/kvm/lib/kvm_util.c + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#ifndef KVM_UTIL_INTERNAL_H +#define KVM_UTIL_INTERNAL_H 1 + +#include "sparsebit.h" + +#ifndef BITS_PER_BYTE +#define BITS_PER_BYTE 8 +#endif + +#ifndef BITS_PER_LONG +#define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long)) +#endif + +#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG) + +/* Concrete definition of struct kvm_vm. */ +struct userspace_mem_region { + struct userspace_mem_region *next, *prev; + struct kvm_userspace_memory_region region; + struct sparsebit *unused_phy_pages; + int fd; + off_t offset; + void *host_mem; + void *mmap_start; + size_t mmap_size; +}; + +struct vcpu { + struct vcpu *next, *prev; + uint32_t id; + int fd; + struct kvm_run *state; +}; + +struct kvm_vm { + int mode; + int fd; + unsigned int page_size; + unsigned int page_shift; + uint64_t max_gfn; + struct vcpu *vcpu_head; + struct userspace_mem_region *userspace_mem_region_head; + struct sparsebit *vpages_valid; + struct sparsebit *vpages_mapped; + bool pgd_created; + vm_paddr_t pgd; +}; + +struct vcpu *vcpu_find(struct kvm_vm *vm, + uint32_t vcpuid); +void vcpu_setup(struct kvm_vm *vm, int vcpuid); +void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); +void regs_dump(FILE *stream, struct kvm_regs *regs, + uint8_t indent); +void sregs_dump(FILE *stream, struct kvm_sregs *sregs, + uint8_t indent); + +#endif diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c new file mode 100644 index 000000000000..0c5cf3e0cb6f --- /dev/null +++ b/tools/testing/selftests/kvm/lib/sparsebit.c @@ -0,0 +1,2087 @@ +/* + * Sparse bit array + * + * Copyright (C) 2018, Google LLC. + * Copyright (C) 2018, Red Hat, Inc. (code style cleanup and fuzzing driver) + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * This library provides functions to support a memory efficient bit array, + * with an index size of 2^64. A sparsebit array is allocated through + * the use sparsebit_alloc() and free'd via sparsebit_free(), + * such as in the following: + * + * struct sparsebit *s; + * s = sparsebit_alloc(); + * sparsebit_free(&s); + * + * The struct sparsebit type resolves down to a struct sparsebit. + * Note that, sparsebit_free() takes a pointer to the sparsebit + * structure. This is so that sparsebit_free() is able to poison + * the pointer (e.g. set it to NULL) to the struct sparsebit before + * returning to the caller. + * + * Between the return of sparsebit_alloc() and the call of + * sparsebit_free(), there are multiple query and modifying operations + * that can be performed on the allocated sparsebit array. All of + * these operations take as a parameter the value returned from + * sparsebit_alloc() and most also take a bit index. Frequently + * used routines include: + * + * ---- Query Operations + * sparsebit_is_set(s, idx) + * sparsebit_is_clear(s, idx) + * sparsebit_any_set(s) + * sparsebit_first_set(s) + * sparsebit_next_set(s, prev_idx) + * + * ---- Modifying Operations + * sparsebit_set(s, idx) + * sparsebit_clear(s, idx) + * sparsebit_set_num(s, idx, num); + * sparsebit_clear_num(s, idx, num); + * + * A common operation, is to itterate over all the bits set in a test + * sparsebit array. This can be done via code with the following structure: + * + * sparsebit_idx_t idx; + * if (sparsebit_any_set(s)) { + * idx = sparsebit_first_set(s); + * do { + * ... + * idx = sparsebit_next_set(s, idx); + * } while (idx != 0); + * } + * + * The index of the first bit set needs to be obtained via + * sparsebit_first_set(), because sparsebit_next_set(), needs + * the index of the previously set. The sparsebit_idx_t type is + * unsigned, so there is no previous index before 0 that is available. + * Also, the call to sparsebit_first_set() is not made unless there + * is at least 1 bit in the array set. This is because sparsebit_first_set() + * aborts if sparsebit_first_set() is called with no bits set. + * It is the callers responsibility to assure that the + * sparsebit array has at least a single bit set before calling + * sparsebit_first_set(). + * + * ==== Implementation Overview ==== + * For the most part the internal implementation of sparsebit is + * opaque to the caller. One important implementation detail that the + * caller may need to be aware of is the spatial complexity of the + * implementation. This implementation of a sparsebit array is not + * only sparse, in that it uses memory proportional to the number of bits + * set. It is also efficient in memory usage when most of the bits are + * set. + * + * At a high-level the state of the bit settings are maintained through + * the use of a binary-search tree, where each node contains at least + * the following members: + * + * typedef uint64_t sparsebit_idx_t; + * typedef uint64_t sparsebit_num_t; + * + * sparsebit_idx_t idx; + * uint32_t mask; + * sparsebit_num_t num_after; + * + * The idx member contains the bit index of the first bit described by this + * node, while the mask member stores the setting of the first 32-bits. + * The setting of the bit at idx + n, where 0 <= n < 32, is located in the + * mask member at 1 << n. + * + * Nodes are sorted by idx and the bits described by two nodes will never + * overlap. The idx member is always aligned to the mask size, i.e. a + * multiple of 32. + * + * Beyond a typical implementation, the nodes in this implementation also + * contains a member named num_after. The num_after member holds the + * number of bits immediately after the mask bits that are contiguously set. + * The use of the num_after member allows this implementation to efficiently + * represent cases where most bits are set. For example, the case of all + * but the last two bits set, is represented by the following two nodes: + * + * node 0 - idx: 0x0 mask: 0xffffffff num_after: 0xffffffffffffffc0 + * node 1 - idx: 0xffffffffffffffe0 mask: 0x3fffffff num_after: 0 + * + * ==== Invariants ==== + * This implementation usses the following invariants: + * + * + Node are only used to represent bits that are set. + * Nodes with a mask of 0 and num_after of 0 are not allowed. + * + * + Sum of bits set in all the nodes is equal to the value of + * the struct sparsebit_pvt num_set member. + * + * + The setting of at least one bit is always described in a nodes + * mask (mask >= 1). + * + * + A node with all mask bits set only occurs when the last bit + * described by the previous node is not equal to this nodes + * starting index - 1. All such occurences of this condition are + * avoided by moving the setting of the nodes mask bits into + * the previous nodes num_after setting. + * + * + Node starting index is evenly divisable by the number of bits + * within a nodes mask member. + * + * + Nodes never represent a range of bits that wrap around the + * highest supported index. + * + * (idx + MASK_BITS + num_after - 1) <= ((sparsebit_idx_t) 0) - 1) + * + * As a consequence of the above, the num_after member of a node + * will always be <=: + * + * maximum_index - nodes_starting_index - number_of_mask_bits + * + * + Nodes within the binary search tree are sorted based on each + * nodes starting index. + * + * + The range of bits described by any two nodes do not overlap. The + * range of bits described by a single node is: + * + * start: node->idx + * end (inclusive): node->idx + MASK_BITS + node->num_after - 1; + * + * Note, at times these invariants are temporarily violated for a + * specific portion of the code. For example, when setting a mask + * bit, there is a small delay between when the mask bit is set and the + * value in the struct sparsebit_pvt num_set member is updated. Other + * temporary violations occur when node_split() is called with a specified + * index and assures that a node where its mask represents the bit + * at the specified index exists. At times to do this node_split() + * must split an existing node into two nodes or create a node that + * has no bits set. Such temporary violations must be corrected before + * returning to the caller. These corrections are typically performed + * by the local function node_reduce(). + */ + +#include "test_util.h" +#include "sparsebit.h" +#include <limits.h> +#include <assert.h> + +#define DUMP_LINE_MAX 100 /* Does not include indent amount */ + +typedef uint32_t mask_t; +#define MASK_BITS (sizeof(mask_t) * CHAR_BIT) + +struct node { + struct node *parent; + struct node *left; + struct node *right; + sparsebit_idx_t idx; /* index of least-significant bit in mask */ + sparsebit_num_t num_after; /* num contiguously set after mask */ + mask_t mask; +}; + +struct sparsebit { + /* + * Points to root node of the binary search + * tree. Equal to NULL when no bits are set in + * the entire sparsebit array. + */ + struct node *root; + + /* + * A redundant count of the total number of bits set. Used for + * diagnostic purposes and to change the time complexity of + * sparsebit_num_set() from O(n) to O(1). + * Note: Due to overflow, a value of 0 means none or all set. + */ + sparsebit_num_t num_set; +}; + +/* Returns the number of set bits described by the settings + * of the node pointed to by nodep. + */ +static sparsebit_num_t node_num_set(struct node *nodep) +{ + return nodep->num_after + __builtin_popcount(nodep->mask); +} + +/* Returns a pointer to the node that describes the + * lowest bit index. + */ +static struct node *node_first(struct sparsebit *s) +{ + struct node *nodep; + + for (nodep = s->root; nodep && nodep->left; nodep = nodep->left) + ; + + return nodep; +} + +/* Returns a pointer to the node that describes the + * lowest bit index > the index of the node pointed to by np. + * Returns NULL if no node with a higher index exists. + */ +static struct node *node_next(struct sparsebit *s, struct node *np) +{ + struct node *nodep = np; + + /* + * If current node has a right child, next node is the left-most + * of the right child. + */ + if (nodep->right) { + for (nodep = nodep->right; nodep->left; nodep = nodep->left) + ; + return nodep; + } + + /* + * No right child. Go up until node is left child of a parent. + * That parent is then the next node. + */ + while (nodep->parent && nodep == nodep->parent->right) + nodep = nodep->parent; + + return nodep->parent; +} + +/* Searches for and returns a pointer to the node that describes the + * highest index < the index of the node pointed to by np. + * Returns NULL if no node with a lower index exists. + */ +static struct node *node_prev(struct sparsebit *s, struct node *np) +{ + struct node *nodep = np; + + /* + * If current node has a left child, next node is the right-most + * of the left child. + */ + if (nodep->left) { + for (nodep = nodep->left; nodep->right; nodep = nodep->right) + ; + return (struct node *) nodep; + } + + /* + * No left child. Go up until node is right child of a parent. + * That parent is then the next node. + */ + while (nodep->parent && nodep == nodep->parent->left) + nodep = nodep->parent; + + return (struct node *) nodep->parent; +} + + +/* Allocates space to hold a copy of the node sub-tree pointed to by + * subtree and duplicates the bit settings to the newly allocated nodes. + * Returns the newly allocated copy of subtree. + */ +static struct node *node_copy_subtree(struct node *subtree) +{ + struct node *root; + + /* Duplicate the node at the root of the subtree */ + root = calloc(1, sizeof(*root)); + if (!root) { + perror("calloc"); + abort(); + } + + root->idx = subtree->idx; + root->mask = subtree->mask; + root->num_after = subtree->num_after; + + /* As needed, recursively duplicate the left and right subtrees */ + if (subtree->left) { + root->left = node_copy_subtree(subtree->left); + root->left->parent = root; + } + + if (subtree->right) { + root->right = node_copy_subtree(subtree->right); + root->right->parent = root; + } + + return root; +} + +/* Searches for and returns a pointer to the node that describes the setting + * of the bit given by idx. A node describes the setting of a bit if its + * index is within the bits described by the mask bits or the number of + * contiguous bits set after the mask. Returns NULL if there is no such node. + */ +static struct node *node_find(struct sparsebit *s, sparsebit_idx_t idx) +{ + struct node *nodep; + + /* Find the node that describes the setting of the bit at idx */ + for (nodep = s->root; nodep; + nodep = nodep->idx > idx ? nodep->left : nodep->right) { + if (idx >= nodep->idx && + idx <= nodep->idx + MASK_BITS + nodep->num_after - 1) + break; + } + + return nodep; +} + +/* Entry Requirements: + * + A node that describes the setting of idx is not already present. + * + * Adds a new node to describe the setting of the bit at the index given + * by idx. Returns a pointer to the newly added node. + * + * TODO(lhuemill): Degenerate cases causes the tree to get unbalanced. + */ +static struct node *node_add(struct sparsebit *s, sparsebit_idx_t idx) +{ + struct node *nodep, *parentp, *prev; + + /* Allocate and initialize the new node. */ + nodep = calloc(1, sizeof(*nodep)); + if (!nodep) { + perror("calloc"); + abort(); + } + + nodep->idx = idx & -MASK_BITS; + + /* If no nodes, set it up as the root node. */ + if (!s->root) { + s->root = nodep; + return nodep; + } + + /* + * Find the parent where the new node should be attached + * and add the node there. + */ + parentp = s->root; + while (true) { + if (idx < parentp->idx) { + if (!parentp->left) { + parentp->left = nodep; + nodep->parent = parentp; + break; + } + parentp = parentp->left; + } else { + assert(idx > parentp->idx + MASK_BITS + parentp->num_after - 1); + if (!parentp->right) { + parentp->right = nodep; + nodep->parent = parentp; + break; + } + parentp = parentp->right; + } + } + + /* + * Does num_after bits of previous node overlap with the mask + * of the new node? If so set the bits in the new nodes mask + * and reduce the previous nodes num_after. + */ + prev = node_prev(s, nodep); + while (prev && prev->idx + MASK_BITS + prev->num_after - 1 >= nodep->idx) { + unsigned int n1 = (prev->idx + MASK_BITS + prev->num_after - 1) + - nodep->idx; + assert(prev->num_after > 0); + assert(n1 < MASK_BITS); + assert(!(nodep->mask & (1 << n1))); + nodep->mask |= (1 << n1); + prev->num_after--; + } + + return nodep; +} + +/* Returns whether all the bits in the sparsebit array are set. */ +bool sparsebit_all_set(struct sparsebit *s) +{ + /* + * If any nodes there must be at least one bit set. Only case + * where a bit is set and total num set is 0, is when all bits + * are set. + */ + return s->root && s->num_set == 0; +} + +/* Clears all bits described by the node pointed to by nodep, then + * removes the node. + */ +static void node_rm(struct sparsebit *s, struct node *nodep) +{ + struct node *tmp; + sparsebit_num_t num_set; + + num_set = node_num_set(nodep); + assert(s->num_set >= num_set || sparsebit_all_set(s)); + s->num_set -= node_num_set(nodep); + + /* Have both left and right child */ + if (nodep->left && nodep->right) { + /* + * Move left children to the leftmost leaf node + * of the right child. + */ + for (tmp = nodep->right; tmp->left; tmp = tmp->left) + ; + tmp->left = nodep->left; + nodep->left = NULL; + tmp->left->parent = tmp; + } + + /* Left only child */ + if (nodep->left) { + if (!nodep->parent) { + s->root = nodep->left; + nodep->left->parent = NULL; + } else { + nodep->left->parent = nodep->parent; + if (nodep == nodep->parent->left) + nodep->parent->left = nodep->left; + else { + assert(nodep == nodep->parent->right); + nodep->parent->right = nodep->left; + } + } + + nodep->parent = nodep->left = nodep->right = NULL; + free(nodep); + + return; + } + + + /* Right only child */ + if (nodep->right) { + if (!nodep->parent) { + s->root = nodep->right; + nodep->right->parent = NULL; + } else { + nodep->right->parent = nodep->parent; + if (nodep == nodep->parent->left) + nodep->parent->left = nodep->right; + else { + assert(nodep == nodep->parent->right); + nodep->parent->right = nodep->right; + } + } + + nodep->parent = nodep->left = nodep->right = NULL; + free(nodep); + + return; + } + + /* Leaf Node */ + if (!nodep->parent) { + s->root = NULL; + } else { + if (nodep->parent->left == nodep) + nodep->parent->left = NULL; + else { + assert(nodep == nodep->parent->right); + nodep->parent->right = NULL; + } + } + + nodep->parent = nodep->left = nodep->right = NULL; + free(nodep); + + return; +} + +/* Splits the node containing the bit at idx so that there is a node + * that starts at the specified index. If no such node exists, a new + * node at the specified index is created. Returns the new node. + * + * idx must start of a mask boundary. + */ +static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx) +{ + struct node *nodep1, *nodep2; + sparsebit_idx_t offset; + sparsebit_num_t orig_num_after; + + assert(!(idx % MASK_BITS)); + + /* + * Is there a node that describes the setting of idx? + * If not, add it. + */ + nodep1 = node_find(s, idx); + if (!nodep1) + return node_add(s, idx); + + /* + * All done if the starting index of the node is where the + * split should occur. + */ + if (nodep1->idx == idx) + return nodep1; + + /* + * Split point not at start of mask, so it must be part of + * bits described by num_after. + */ + + /* + * Calculate offset within num_after for where the split is + * to occur. + */ + offset = idx - (nodep1->idx + MASK_BITS); + orig_num_after = nodep1->num_after; + + /* + * Add a new node to describe the bits starting at + * the split point. + */ + nodep1->num_after = offset; + nodep2 = node_add(s, idx); + + /* Move bits after the split point into the new node */ + nodep2->num_after = orig_num_after - offset; + if (nodep2->num_after >= MASK_BITS) { + nodep2->mask = ~(mask_t) 0; + nodep2->num_after -= MASK_BITS; + } else { + nodep2->mask = (1 << nodep2->num_after) - 1; + nodep2->num_after = 0; + } + + return nodep2; +} + +/* Iteratively reduces the node pointed to by nodep and its adjacent + * nodes into a more compact form. For example, a node with a mask with + * all bits set adjacent to a previous node, will get combined into a + * single node with an increased num_after setting. + * + * After each reduction, a further check is made to see if additional + * reductions are possible with the new previous and next nodes. Note, + * a search for a reduction is only done across the nodes nearest nodep + * and those that became part of a reduction. Reductions beyond nodep + * and the adjacent nodes that are reduced are not discovered. It is the + * responsibility of the caller to pass a nodep that is within one node + * of each possible reduction. + * + * This function does not fix the temporary violation of all invariants. + * For example it does not fix the case where the bit settings described + * by two or more nodes overlap. Such a violation introduces the potential + * complication of a bit setting for a specific index having different settings + * in different nodes. This would then introduce the further complication + * of which node has the correct setting of the bit and thus such conditions + * are not allowed. + * + * This function is designed to fix invariant violations that are introduced + * by node_split() and by changes to the nodes mask or num_after members. + * For example, when setting a bit within a nodes mask, the function that + * sets the bit doesn't have to worry about whether the setting of that + * bit caused the mask to have leading only or trailing only bits set. + * Instead, the function can call node_reduce(), with nodep equal to the + * node address that it set a mask bit in, and node_reduce() will notice + * the cases of leading or trailing only bits and that there is an + * adjacent node that the bit settings could be merged into. + * + * This implementation specifically detects and corrects violation of the + * following invariants: + * + * + Node are only used to represent bits that are set. + * Nodes with a mask of 0 and num_after of 0 are not allowed. + * + * + The setting of at least one bit is always described in a nodes + * mask (mask >= 1). + * + * + A node with all mask bits set only occurs when the last bit + * described by the previous node is not equal to this nodes + * starting index - 1. All such occurences of this condition are + * avoided by moving the setting of the nodes mask bits into + * the previous nodes num_after setting. + */ +static void node_reduce(struct sparsebit *s, struct node *nodep) +{ + bool reduction_performed; + + do { + reduction_performed = false; + struct node *prev, *next, *tmp; + + /* 1) Potential reductions within the current node. */ + + /* Nodes with all bits cleared may be removed. */ + if (nodep->mask == 0 && nodep->num_after == 0) { + /* + * About to remove the node pointed to by + * nodep, which normally would cause a problem + * for the next pass through the reduction loop, + * because the node at the starting point no longer + * exists. This potential problem is handled + * by first remembering the location of the next + * or previous nodes. Doesn't matter which, because + * once the node at nodep is removed, there will be + * no other nodes between prev and next. + * + * Note, the checks performed on nodep against both + * both prev and next both check for an adjacent + * node that can be reduced into a single node. As + * such, after removing the node at nodep, doesn't + * matter whether the nodep for the next pass + * through the loop is equal to the previous pass + * prev or next node. Either way, on the next pass + * the one not selected will become either the + * prev or next node. + */ + tmp = node_next(s, nodep); + if (!tmp) + tmp = node_prev(s, nodep); + + node_rm(s, nodep); + nodep = NULL; + + nodep = tmp; + reduction_performed = true; + continue; + } + + /* + * When the mask is 0, can reduce the amount of num_after + * bits by moving the initial num_after bits into the mask. + */ + if (nodep->mask == 0) { + assert(nodep->num_after != 0); + assert(nodep->idx + MASK_BITS > nodep->idx); + + nodep->idx += MASK_BITS; + + if (nodep->num_after >= MASK_BITS) { + nodep->mask = ~0; + nodep->num_after -= MASK_BITS; + } else { + nodep->mask = (1u << nodep->num_after) - 1; + nodep->num_after = 0; + } + + reduction_performed = true; + continue; + } + + /* + * 2) Potential reductions between the current and + * previous nodes. + */ + prev = node_prev(s, nodep); + if (prev) { + sparsebit_idx_t prev_highest_bit; + + /* Nodes with no bits set can be removed. */ + if (prev->mask == 0 && prev->num_after == 0) { + node_rm(s, prev); + + reduction_performed = true; + continue; + } + + /* + * All mask bits set and previous node has + * adjacent index. + */ + if (nodep->mask + 1 == 0 && + prev->idx + MASK_BITS == nodep->idx) { + prev->num_after += MASK_BITS + nodep->num_after; + nodep->mask = 0; + nodep->num_after = 0; + + reduction_performed = true; + continue; + } + + /* + * Is node adjacent to previous node and the node + * contains a single contiguous range of bits + * starting from the beginning of the mask? + */ + prev_highest_bit = prev->idx + MASK_BITS - 1 + prev->num_after; + if (prev_highest_bit + 1 == nodep->idx && + (nodep->mask | (nodep->mask >> 1)) == nodep->mask) { + /* + * How many contiguous bits are there? + * Is equal to the total number of set + * bits, due to an earlier check that + * there is a single contiguous range of + * set bits. + */ + unsigned int num_contiguous + = __builtin_popcount(nodep->mask); + assert((num_contiguous > 0) && + ((1ULL << num_contiguous) - 1) == nodep->mask); + + prev->num_after += num_contiguous; + nodep->mask = 0; + + /* + * For predictable performance, handle special + * case where all mask bits are set and there + * is a non-zero num_after setting. This code + * is functionally correct without the following + * conditionalized statements, but without them + * the value of num_after is only reduced by + * the number of mask bits per pass. There are + * cases where num_after can be close to 2^64. + * Without this code it could take nearly + * (2^64) / 32 passes to perform the full + * reduction. + */ + if (num_contiguous == MASK_BITS) { + prev->num_after += nodep->num_after; + nodep->num_after = 0; + } + + reduction_performed = true; + continue; + } + } + + /* + * 3) Potential reductions between the current and + * next nodes. + */ + next = node_next(s, nodep); + if (next) { + /* Nodes with no bits set can be removed. */ + if (next->mask == 0 && next->num_after == 0) { + node_rm(s, next); + reduction_performed = true; + continue; + } + + /* + * Is next node index adjacent to current node + * and has a mask with all bits set? + */ + if (next->idx == nodep->idx + MASK_BITS + nodep->num_after && + next->mask == ~(mask_t) 0) { + nodep->num_after += MASK_BITS; + next->mask = 0; + nodep->num_after += next->num_after; + next->num_after = 0; + + node_rm(s, next); + next = NULL; + + reduction_performed = true; + continue; + } + } + } while (nodep && reduction_performed); +} + +/* Returns whether the bit at the index given by idx, within the + * sparsebit array is set or not. + */ +bool sparsebit_is_set(struct sparsebit *s, sparsebit_idx_t idx) +{ + struct node *nodep; + + /* Find the node that describes the setting of the bit at idx */ + for (nodep = s->root; nodep; + nodep = nodep->idx > idx ? nodep->left : nodep->right) + if (idx >= nodep->idx && + idx <= nodep->idx + MASK_BITS + nodep->num_after - 1) + goto have_node; + + return false; + +have_node: + /* Bit is set if it is any of the bits described by num_after */ + if (nodep->num_after && idx >= nodep->idx + MASK_BITS) + return true; + + /* Is the corresponding mask bit set */ + assert(idx >= nodep->idx && idx - nodep->idx < MASK_BITS); + return !!(nodep->mask & (1 << (idx - nodep->idx))); +} + +/* Within the sparsebit array pointed to by s, sets the bit + * at the index given by idx. + */ +static void bit_set(struct sparsebit *s, sparsebit_idx_t idx) +{ + struct node *nodep; + + /* Skip bits that are already set */ + if (sparsebit_is_set(s, idx)) + return; + + /* + * Get a node where the bit at idx is described by the mask. + * The node_split will also create a node, if there isn't + * already a node that describes the setting of bit. + */ + nodep = node_split(s, idx & -MASK_BITS); + + /* Set the bit within the nodes mask */ + assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1); + assert(!(nodep->mask & (1 << (idx - nodep->idx)))); + nodep->mask |= 1 << (idx - nodep->idx); + s->num_set++; + + node_reduce(s, nodep); +} + +/* Within the sparsebit array pointed to by s, clears the bit + * at the index given by idx. + */ +static void bit_clear(struct sparsebit *s, sparsebit_idx_t idx) +{ + struct node *nodep; + + /* Skip bits that are already cleared */ + if (!sparsebit_is_set(s, idx)) + return; + + /* Is there a node that describes the setting of this bit? */ + nodep = node_find(s, idx); + if (!nodep) + return; + + /* + * If a num_after bit, split the node, so that the bit is + * part of a node mask. + */ + if (idx >= nodep->idx + MASK_BITS) + nodep = node_split(s, idx & -MASK_BITS); + + /* + * After node_split above, bit at idx should be within the mask. + * Clear that bit. + */ + assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1); + assert(nodep->mask & (1 << (idx - nodep->idx))); + nodep->mask &= ~(1 << (idx - nodep->idx)); + assert(s->num_set > 0 || sparsebit_all_set(s)); + s->num_set--; + + node_reduce(s, nodep); +} + +/* Recursively dumps to the FILE stream given by stream the contents + * of the sub-tree of nodes pointed to by nodep. Each line of output + * is prefixed by the number of spaces given by indent. On each + * recursion, the indent amount is increased by 2. This causes nodes + * at each level deeper into the binary search tree to be displayed + * with a greater indent. + */ +static void dump_nodes(FILE *stream, struct node *nodep, + unsigned int indent) +{ + char *node_type; + + /* Dump contents of node */ + if (!nodep->parent) + node_type = "root"; + else if (nodep == nodep->parent->left) + node_type = "left"; + else { + assert(nodep == nodep->parent->right); + node_type = "right"; + } + fprintf(stream, "%*s---- %s nodep: %p\n", indent, "", node_type, nodep); + fprintf(stream, "%*s parent: %p left: %p right: %p\n", indent, "", + nodep->parent, nodep->left, nodep->right); + fprintf(stream, "%*s idx: 0x%lx mask: 0x%x num_after: 0x%lx\n", + indent, "", nodep->idx, nodep->mask, nodep->num_after); + + /* If present, dump contents of left child nodes */ + if (nodep->left) + dump_nodes(stream, nodep->left, indent + 2); + + /* If present, dump contents of right child nodes */ + if (nodep->right) + dump_nodes(stream, nodep->right, indent + 2); +} + +static inline sparsebit_idx_t node_first_set(struct node *nodep, int start) +{ + mask_t leading = (mask_t)1 << start; + int n1 = __builtin_ctz(nodep->mask & -leading); + + return nodep->idx + n1; +} + +static inline sparsebit_idx_t node_first_clear(struct node *nodep, int start) +{ + mask_t leading = (mask_t)1 << start; + int n1 = __builtin_ctz(~nodep->mask & -leading); + + return nodep->idx + n1; +} + +/* Dumps to the FILE stream specified by stream, the implementation dependent + * internal state of s. Each line of output is prefixed with the number + * of spaces given by indent. The output is completely implementation + * dependent and subject to change. Output from this function should only + * be used for diagnostic purposes. For example, this function can be + * used by test cases after they detect an unexpected condition, as a means + * to capture diagnostic information. + */ +static void sparsebit_dump_internal(FILE *stream, struct sparsebit *s, + unsigned int indent) +{ + /* Dump the contents of s */ + fprintf(stream, "%*sroot: %p\n", indent, "", s->root); + fprintf(stream, "%*snum_set: 0x%lx\n", indent, "", s->num_set); + + if (s->root) + dump_nodes(stream, s->root, indent); +} + +/* Allocates and returns a new sparsebit array. The initial state + * of the newly allocated sparsebit array has all bits cleared. + */ +struct sparsebit *sparsebit_alloc(void) +{ + struct sparsebit *s; + + /* Allocate top level structure. */ + s = calloc(1, sizeof(*s)); + if (!s) { + perror("calloc"); + abort(); + } + + return s; +} + +/* Frees the implementation dependent data for the sparsebit array + * pointed to by s and poisons the pointer to that data. + */ +void sparsebit_free(struct sparsebit **sbitp) +{ + struct sparsebit *s = *sbitp; + + if (!s) + return; + + sparsebit_clear_all(s); + free(s); + *sbitp = NULL; +} + +/* Makes a copy of the sparsebit array given by s, to the sparsebit + * array given by d. Note, d must have already been allocated via + * sparsebit_alloc(). It can though already have bits set, which + * if different from src will be cleared. + */ +void sparsebit_copy(struct sparsebit *d, struct sparsebit *s) +{ + /* First clear any bits already set in the destination */ + sparsebit_clear_all(d); + + if (s->root) { + d->root = node_copy_subtree(s->root); + d->num_set = s->num_set; + } +} + +/* Returns whether num consecutive bits starting at idx are all set. */ +bool sparsebit_is_set_num(struct sparsebit *s, + sparsebit_idx_t idx, sparsebit_num_t num) +{ + sparsebit_idx_t next_cleared; + + assert(num > 0); + assert(idx + num - 1 >= idx); + + /* With num > 0, the first bit must be set. */ + if (!sparsebit_is_set(s, idx)) + return false; + + /* Find the next cleared bit */ + next_cleared = sparsebit_next_clear(s, idx); + + /* + * If no cleared bits beyond idx, then there are at least num + * set bits. idx + num doesn't wrap. Otherwise check if + * there are enough set bits between idx and the next cleared bit. + */ + return next_cleared == 0 || next_cleared - idx >= num; +} + +/* Returns whether the bit at the index given by idx. */ +bool sparsebit_is_clear(struct sparsebit *s, + sparsebit_idx_t idx) +{ + return !sparsebit_is_set(s, idx); +} + +/* Returns whether num consecutive bits starting at idx are all cleared. */ +bool sparsebit_is_clear_num(struct sparsebit *s, + sparsebit_idx_t idx, sparsebit_num_t num) +{ + sparsebit_idx_t next_set; + + assert(num > 0); + assert(idx + num - 1 >= idx); + + /* With num > 0, the first bit must be cleared. */ + if (!sparsebit_is_clear(s, idx)) + return false; + + /* Find the next set bit */ + next_set = sparsebit_next_set(s, idx); + + /* + * If no set bits beyond idx, then there are at least num + * cleared bits. idx + num doesn't wrap. Otherwise check if + * there are enough cleared bits between idx and the next set bit. + */ + return next_set == 0 || next_set - idx >= num; +} + +/* Returns the total number of bits set. Note: 0 is also returned for + * the case of all bits set. This is because with all bits set, there + * is 1 additional bit set beyond what can be represented in the return + * value. Use sparsebit_any_set(), instead of sparsebit_num_set() > 0, + * to determine if the sparsebit array has any bits set. + */ +sparsebit_num_t sparsebit_num_set(struct sparsebit *s) +{ + return s->num_set; +} + +/* Returns whether any bit is set in the sparsebit array. */ +bool sparsebit_any_set(struct sparsebit *s) +{ + /* + * Nodes only describe set bits. If any nodes then there + * is at least 1 bit set. + */ + if (!s->root) + return false; + + /* + * Every node should have a non-zero mask. For now will + * just assure that the root node has a non-zero mask, + * which is a quick check that at least 1 bit is set. + */ + assert(s->root->mask != 0); + assert(s->num_set > 0 || + (s->root->num_after == ((sparsebit_num_t) 0) - MASK_BITS && + s->root->mask == ~(mask_t) 0)); + + return true; +} + +/* Returns whether all the bits in the sparsebit array are cleared. */ +bool sparsebit_all_clear(struct sparsebit *s) +{ + return !sparsebit_any_set(s); +} + +/* Returns whether all the bits in the sparsebit array are set. */ +bool sparsebit_any_clear(struct sparsebit *s) +{ + return !sparsebit_all_set(s); +} + +/* Returns the index of the first set bit. Abort if no bits are set. + */ +sparsebit_idx_t sparsebit_first_set(struct sparsebit *s) +{ + struct node *nodep; + + /* Validate at least 1 bit is set */ + assert(sparsebit_any_set(s)); + + nodep = node_first(s); + return node_first_set(nodep, 0); +} + +/* Returns the index of the first cleared bit. Abort if + * no bits are cleared. + */ +sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s) +{ + struct node *nodep1, *nodep2; + + /* Validate at least 1 bit is cleared. */ + assert(sparsebit_any_clear(s)); + + /* If no nodes or first node index > 0 then lowest cleared is 0 */ + nodep1 = node_first(s); + if (!nodep1 || nodep1->idx > 0) + return 0; + + /* Does the mask in the first node contain any cleared bits. */ + if (nodep1->mask != ~(mask_t) 0) + return node_first_clear(nodep1, 0); + + /* + * All mask bits set in first node. If there isn't a second node + * then the first cleared bit is the first bit after the bits + * described by the first node. + */ + nodep2 = node_next(s, nodep1); + if (!nodep2) { + /* + * No second node. First cleared bit is first bit beyond + * bits described by first node. + */ + assert(nodep1->mask == ~(mask_t) 0); + assert(nodep1->idx + MASK_BITS + nodep1->num_after != (sparsebit_idx_t) 0); + return nodep1->idx + MASK_BITS + nodep1->num_after; + } + + /* + * There is a second node. + * If it is not adjacent to the first node, then there is a gap + * of cleared bits between the nodes, and the first cleared bit + * is the first bit within the gap. + */ + if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx) + return nodep1->idx + MASK_BITS + nodep1->num_after; + + /* + * Second node is adjacent to the first node. + * Because it is adjacent, its mask should be non-zero. If all + * its mask bits are set, then with it being adjacent, it should + * have had the mask bits moved into the num_after setting of the + * previous node. + */ + return node_first_clear(nodep2, 0); +} + +/* Returns index of next bit set within s after the index given by prev. + * Returns 0 if there are no bits after prev that are set. + */ +sparsebit_idx_t sparsebit_next_set(struct sparsebit *s, + sparsebit_idx_t prev) +{ + sparsebit_idx_t lowest_possible = prev + 1; + sparsebit_idx_t start; + struct node *nodep; + + /* A bit after the highest index can't be set. */ + if (lowest_possible == 0) + return 0; + + /* + * Find the leftmost 'candidate' overlapping or to the right + * of lowest_possible. + */ + struct node *candidate = NULL; + + /* True iff lowest_possible is within candidate */ + bool contains = false; + + /* + * Find node that describes setting of bit at lowest_possible. + * If such a node doesn't exist, find the node with the lowest + * starting index that is > lowest_possible. + */ + for (nodep = s->root; nodep;) { + if ((nodep->idx + MASK_BITS + nodep->num_after - 1) + >= lowest_possible) { + candidate = nodep; + if (candidate->idx <= lowest_possible) { + contains = true; + break; + } + nodep = nodep->left; + } else { + nodep = nodep->right; + } + } + if (!candidate) + return 0; + + assert(candidate->mask != 0); + + /* Does the candidate node describe the setting of lowest_possible? */ + if (!contains) { + /* + * Candidate doesn't describe setting of bit at lowest_possible. + * Candidate points to the first node with a starting index + * > lowest_possible. + */ + assert(candidate->idx > lowest_possible); + + return node_first_set(candidate, 0); + } + + /* + * Candidate describes setting of bit at lowest_possible. + * Note: although the node describes the setting of the bit + * at lowest_possible, its possible that its setting and the + * setting of all latter bits described by this node are 0. + * For now, just handle the cases where this node describes + * a bit at or after an index of lowest_possible that is set. + */ + start = lowest_possible - candidate->idx; + + if (start < MASK_BITS && candidate->mask >= (1 << start)) + return node_first_set(candidate, start); + + if (candidate->num_after) { + sparsebit_idx_t first_num_after_idx = candidate->idx + MASK_BITS; + + return lowest_possible < first_num_after_idx + ? first_num_after_idx : lowest_possible; + } + + /* + * Although candidate node describes setting of bit at + * the index of lowest_possible, all bits at that index and + * latter that are described by candidate are cleared. With + * this, the next bit is the first bit in the next node, if + * such a node exists. If a next node doesn't exist, then + * there is no next set bit. + */ + candidate = node_next(s, candidate); + if (!candidate) + return 0; + + return node_first_set(candidate, 0); +} + +/* Returns index of next bit cleared within s after the index given by prev. + * Returns 0 if there are no bits after prev that are cleared. + */ +sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s, + sparsebit_idx_t prev) +{ + sparsebit_idx_t lowest_possible = prev + 1; + sparsebit_idx_t idx; + struct node *nodep1, *nodep2; + + /* A bit after the highest index can't be set. */ + if (lowest_possible == 0) + return 0; + + /* + * Does a node describing the setting of lowest_possible exist? + * If not, the bit at lowest_possible is cleared. + */ + nodep1 = node_find(s, lowest_possible); + if (!nodep1) + return lowest_possible; + + /* Does a mask bit in node 1 describe the next cleared bit. */ + for (idx = lowest_possible - nodep1->idx; idx < MASK_BITS; idx++) + if (!(nodep1->mask & (1 << idx))) + return nodep1->idx + idx; + + /* + * Next cleared bit is not described by node 1. If there + * isn't a next node, then next cleared bit is described + * by bit after the bits described by the first node. + */ + nodep2 = node_next(s, nodep1); + if (!nodep2) + return nodep1->idx + MASK_BITS + nodep1->num_after; + + /* + * There is a second node. + * If it is not adjacent to the first node, then there is a gap + * of cleared bits between the nodes, and the next cleared bit + * is the first bit within the gap. + */ + if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx) + return nodep1->idx + MASK_BITS + nodep1->num_after; + + /* + * Second node is adjacent to the first node. + * Because it is adjacent, its mask should be non-zero. If all + * its mask bits are set, then with it being adjacent, it should + * have had the mask bits moved into the num_after setting of the + * previous node. + */ + return node_first_clear(nodep2, 0); +} + +/* Starting with the index 1 greater than the index given by start, finds + * and returns the index of the first sequence of num consecutively set + * bits. Returns a value of 0 of no such sequence exists. + */ +sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s, + sparsebit_idx_t start, sparsebit_num_t num) +{ + sparsebit_idx_t idx; + + assert(num >= 1); + + for (idx = sparsebit_next_set(s, start); + idx != 0 && idx + num - 1 >= idx; + idx = sparsebit_next_set(s, idx)) { + assert(sparsebit_is_set(s, idx)); + + /* + * Does the sequence of bits starting at idx consist of + * num set bits? + */ + if (sparsebit_is_set_num(s, idx, num)) + return idx; + + /* + * Sequence of set bits at idx isn't large enough. + * Skip this entire sequence of set bits. + */ + idx = sparsebit_next_clear(s, idx); + if (idx == 0) + return 0; + } + + return 0; +} + +/* Starting with the index 1 greater than the index given by start, finds + * and returns the index of the first sequence of num consecutively cleared + * bits. Returns a value of 0 of no such sequence exists. + */ +sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *s, + sparsebit_idx_t start, sparsebit_num_t num) +{ + sparsebit_idx_t idx; + + assert(num >= 1); + + for (idx = sparsebit_next_clear(s, start); + idx != 0 && idx + num - 1 >= idx; + idx = sparsebit_next_clear(s, idx)) { + assert(sparsebit_is_clear(s, idx)); + + /* + * Does the sequence of bits starting at idx consist of + * num cleared bits? + */ + if (sparsebit_is_clear_num(s, idx, num)) + return idx; + + /* + * Sequence of cleared bits at idx isn't large enough. + * Skip this entire sequence of cleared bits. + */ + idx = sparsebit_next_set(s, idx); + if (idx == 0) + return 0; + } + + return 0; +} + +/* Sets the bits * in the inclusive range idx through idx + num - 1. */ +void sparsebit_set_num(struct sparsebit *s, + sparsebit_idx_t start, sparsebit_num_t num) +{ + struct node *nodep, *next; + unsigned int n1; + sparsebit_idx_t idx; + sparsebit_num_t n; + sparsebit_idx_t middle_start, middle_end; + + assert(num > 0); + assert(start + num - 1 >= start); + + /* + * Leading - bits before first mask boundary. + * + * TODO(lhuemill): With some effort it may be possible to + * replace the following loop with a sequential sequence + * of statements. High level sequence would be: + * + * 1. Use node_split() to force node that describes setting + * of idx to be within the mask portion of a node. + * 2. Form mask of bits to be set. + * 3. Determine number of mask bits already set in the node + * and store in a local variable named num_already_set. + * 4. Set the appropriate mask bits within the node. + * 5. Increment struct sparsebit_pvt num_set member + * by the number of bits that were actually set. + * Exclude from the counts bits that were already set. + * 6. Before returning to the caller, use node_reduce() to + * handle the multiple corner cases that this method + * introduces. + */ + for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--) + bit_set(s, idx); + + /* Middle - bits spanning one or more entire mask */ + middle_start = idx; + middle_end = middle_start + (n & -MASK_BITS) - 1; + if (n >= MASK_BITS) { + nodep = node_split(s, middle_start); + + /* + * As needed, split just after end of middle bits. + * No split needed if end of middle bits is at highest + * supported bit index. + */ + if (middle_end + 1 > middle_end) + (void) node_split(s, middle_end + 1); + + /* Delete nodes that only describe bits within the middle. */ + for (next = node_next(s, nodep); + next && (next->idx < middle_end); + next = node_next(s, nodep)) { + assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end); + node_rm(s, next); + next = NULL; + } + + /* As needed set each of the mask bits */ + for (n1 = 0; n1 < MASK_BITS; n1++) { + if (!(nodep->mask & (1 << n1))) { + nodep->mask |= 1 << n1; + s->num_set++; + } + } + + s->num_set -= nodep->num_after; + nodep->num_after = middle_end - middle_start + 1 - MASK_BITS; + s->num_set += nodep->num_after; + + node_reduce(s, nodep); + } + idx = middle_end + 1; + n -= middle_end - middle_start + 1; + + /* Trailing - bits at and beyond last mask boundary */ + assert(n < MASK_BITS); + for (; n > 0; idx++, n--) + bit_set(s, idx); +} + +/* Clears the bits * in the inclusive range idx through idx + num - 1. */ +void sparsebit_clear_num(struct sparsebit *s, + sparsebit_idx_t start, sparsebit_num_t num) +{ + struct node *nodep, *next; + unsigned int n1; + sparsebit_idx_t idx; + sparsebit_num_t n; + sparsebit_idx_t middle_start, middle_end; + + assert(num > 0); + assert(start + num - 1 >= start); + + /* Leading - bits before first mask boundary */ + for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--) + bit_clear(s, idx); + + /* Middle - bits spanning one or more entire mask */ + middle_start = idx; + middle_end = middle_start + (n & -MASK_BITS) - 1; + if (n >= MASK_BITS) { + nodep = node_split(s, middle_start); + + /* + * As needed, split just after end of middle bits. + * No split needed if end of middle bits is at highest + * supported bit index. + */ + if (middle_end + 1 > middle_end) + (void) node_split(s, middle_end + 1); + + /* Delete nodes that only describe bits within the middle. */ + for (next = node_next(s, nodep); + next && (next->idx < middle_end); + next = node_next(s, nodep)) { + assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end); + node_rm(s, next); + next = NULL; + } + + /* As needed clear each of the mask bits */ + for (n1 = 0; n1 < MASK_BITS; n1++) { + if (nodep->mask & (1 << n1)) { + nodep->mask &= ~(1 << n1); + s->num_set--; + } + } + + /* Clear any bits described by num_after */ + s->num_set -= nodep->num_after; + nodep->num_after = 0; + + /* + * Delete the node that describes the beginning of + * the middle bits and perform any allowed reductions + * with the nodes prev or next of nodep. + */ + node_reduce(s, nodep); + nodep = NULL; + } + idx = middle_end + 1; + n -= middle_end - middle_start + 1; + + /* Trailing - bits at and beyond last mask boundary */ + assert(n < MASK_BITS); + for (; n > 0; idx++, n--) + bit_clear(s, idx); +} + +/* Sets the bit at the index given by idx. */ +void sparsebit_set(struct sparsebit *s, sparsebit_idx_t idx) +{ + sparsebit_set_num(s, idx, 1); +} + +/* Clears the bit at the index given by idx. */ +void sparsebit_clear(struct sparsebit *s, sparsebit_idx_t idx) +{ + sparsebit_clear_num(s, idx, 1); +} + +/* Sets the bits in the entire addressable range of the sparsebit array. */ +void sparsebit_set_all(struct sparsebit *s) +{ + sparsebit_set(s, 0); + sparsebit_set_num(s, 1, ~(sparsebit_idx_t) 0); + assert(sparsebit_all_set(s)); +} + +/* Clears the bits in the entire addressable range of the sparsebit array. */ +void sparsebit_clear_all(struct sparsebit *s) +{ + sparsebit_clear(s, 0); + sparsebit_clear_num(s, 1, ~(sparsebit_idx_t) 0); + assert(!sparsebit_any_set(s)); +} + +static size_t display_range(FILE *stream, sparsebit_idx_t low, + sparsebit_idx_t high, bool prepend_comma_space) +{ + char *fmt_str; + size_t sz; + + /* Determine the printf format string */ + if (low == high) + fmt_str = prepend_comma_space ? ", 0x%lx" : "0x%lx"; + else + fmt_str = prepend_comma_space ? ", 0x%lx:0x%lx" : "0x%lx:0x%lx"; + + /* + * When stream is NULL, just determine the size of what would + * have been printed, else print the range. + */ + if (!stream) + sz = snprintf(NULL, 0, fmt_str, low, high); + else + sz = fprintf(stream, fmt_str, low, high); + + return sz; +} + + +/* Dumps to the FILE stream given by stream, the bit settings + * of s. Each line of output is prefixed with the number of + * spaces given by indent. The length of each line is implementation + * dependent and does not depend on the indent amount. The following + * is an example output of a sparsebit array that has bits: + * + * 0x5, 0x8, 0xa:0xe, 0x12 + * + * This corresponds to a sparsebit whose bits 5, 8, 10, 11, 12, 13, 14, 18 + * are set. Note that a ':', instead of a '-' is used to specify a range of + * contiguous bits. This is done because '-' is used to specify command-line + * options, and sometimes ranges are specified as command-line arguments. + */ +void sparsebit_dump(FILE *stream, struct sparsebit *s, + unsigned int indent) +{ + size_t current_line_len = 0; + size_t sz; + struct node *nodep; + + if (!sparsebit_any_set(s)) + return; + + /* Display initial indent */ + fprintf(stream, "%*s", indent, ""); + + /* For each node */ + for (nodep = node_first(s); nodep; nodep = node_next(s, nodep)) { + unsigned int n1; + sparsebit_idx_t low, high; + + /* For each group of bits in the mask */ + for (n1 = 0; n1 < MASK_BITS; n1++) { + if (nodep->mask & (1 << n1)) { + low = high = nodep->idx + n1; + + for (; n1 < MASK_BITS; n1++) { + if (nodep->mask & (1 << n1)) + high = nodep->idx + n1; + else + break; + } + + if ((n1 == MASK_BITS) && nodep->num_after) + high += nodep->num_after; + + /* + * How much room will it take to display + * this range. + */ + sz = display_range(NULL, low, high, + current_line_len != 0); + + /* + * If there is not enough room, display + * a newline plus the indent of the next + * line. + */ + if (current_line_len + sz > DUMP_LINE_MAX) { + fputs("\n", stream); + fprintf(stream, "%*s", indent, ""); + current_line_len = 0; + } + + /* Display the range */ + sz = display_range(stream, low, high, + current_line_len != 0); + current_line_len += sz; + } + } + + /* + * If num_after and most significant-bit of mask is not + * set, then still need to display a range for the bits + * described by num_after. + */ + if (!(nodep->mask & (1 << (MASK_BITS - 1))) && nodep->num_after) { + low = nodep->idx + MASK_BITS; + high = nodep->idx + MASK_BITS + nodep->num_after - 1; + + /* + * How much room will it take to display + * this range. + */ + sz = display_range(NULL, low, high, + current_line_len != 0); + + /* + * If there is not enough room, display + * a newline plus the indent of the next + * line. + */ + if (current_line_len + sz > DUMP_LINE_MAX) { + fputs("\n", stream); + fprintf(stream, "%*s", indent, ""); + current_line_len = 0; + } + + /* Display the range */ + sz = display_range(stream, low, high, + current_line_len != 0); + current_line_len += sz; + } + } + fputs("\n", stream); +} + +/* Validates the internal state of the sparsebit array given by + * s. On error, diagnostic information is printed to stderr and + * abort is called. + */ +void sparsebit_validate_internal(struct sparsebit *s) +{ + bool error_detected = false; + struct node *nodep, *prev = NULL; + sparsebit_num_t total_bits_set = 0; + unsigned int n1; + + /* For each node */ + for (nodep = node_first(s); nodep; + prev = nodep, nodep = node_next(s, nodep)) { + + /* + * Increase total bits set by the number of bits set + * in this node. + */ + for (n1 = 0; n1 < MASK_BITS; n1++) + if (nodep->mask & (1 << n1)) + total_bits_set++; + + total_bits_set += nodep->num_after; + + /* + * Arbitrary choice as to whether a mask of 0 is allowed + * or not. For diagnostic purposes it is beneficial to + * have only one valid means to represent a set of bits. + * To support this an arbitrary choice has been made + * to not allow a mask of zero. + */ + if (nodep->mask == 0) { + fprintf(stderr, "Node mask of zero, " + "nodep: %p nodep->mask: 0x%x", + nodep, nodep->mask); + error_detected = true; + break; + } + + /* + * Validate num_after is not greater than the max index + * - the number of mask bits. The num_after member + * uses 0-based indexing and thus has no value that + * represents all bits set. This limitation is handled + * by requiring a non-zero mask. With a non-zero mask, + * MASK_BITS worth of bits are described by the mask, + * which makes the largest needed num_after equal to: + * + * (~(sparsebit_num_t) 0) - MASK_BITS + 1 + */ + if (nodep->num_after + > (~(sparsebit_num_t) 0) - MASK_BITS + 1) { + fprintf(stderr, "num_after too large, " + "nodep: %p nodep->num_after: 0x%lx", + nodep, nodep->num_after); + error_detected = true; + break; + } + + /* Validate node index is divisible by the mask size */ + if (nodep->idx % MASK_BITS) { + fprintf(stderr, "Node index not divisable by " + "mask size,\n" + " nodep: %p nodep->idx: 0x%lx " + "MASK_BITS: %lu\n", + nodep, nodep->idx, MASK_BITS); + error_detected = true; + break; + } + + /* + * Validate bits described by node don't wrap beyond the + * highest supported index. + */ + if ((nodep->idx + MASK_BITS + nodep->num_after - 1) < nodep->idx) { + fprintf(stderr, "Bits described by node wrap " + "beyond highest supported index,\n" + " nodep: %p nodep->idx: 0x%lx\n" + " MASK_BITS: %lu nodep->num_after: 0x%lx", + nodep, nodep->idx, MASK_BITS, nodep->num_after); + error_detected = true; + break; + } + + /* Check parent pointers. */ + if (nodep->left) { + if (nodep->left->parent != nodep) { + fprintf(stderr, "Left child parent pointer " + "doesn't point to this node,\n" + " nodep: %p nodep->left: %p " + "nodep->left->parent: %p", + nodep, nodep->left, + nodep->left->parent); + error_detected = true; + break; + } + } + + if (nodep->right) { + if (nodep->right->parent != nodep) { + fprintf(stderr, "Right child parent pointer " + "doesn't point to this node,\n" + " nodep: %p nodep->right: %p " + "nodep->right->parent: %p", + nodep, nodep->right, + nodep->right->parent); + error_detected = true; + break; + } + } + + if (!nodep->parent) { + if (s->root != nodep) { + fprintf(stderr, "Unexpected root node, " + "s->root: %p nodep: %p", + s->root, nodep); + error_detected = true; + break; + } + } + + if (prev) { + /* + * Is index of previous node before index of + * current node? + */ + if (prev->idx >= nodep->idx) { + fprintf(stderr, "Previous node index " + ">= current node index,\n" + " prev: %p prev->idx: 0x%lx\n" + " nodep: %p nodep->idx: 0x%lx", + prev, prev->idx, nodep, nodep->idx); + error_detected = true; + break; + } + + /* + * Nodes occur in asscending order, based on each + * nodes starting index. + */ + if ((prev->idx + MASK_BITS + prev->num_after - 1) + >= nodep->idx) { + fprintf(stderr, "Previous node bit range " + "overlap with current node bit range,\n" + " prev: %p prev->idx: 0x%lx " + "prev->num_after: 0x%lx\n" + " nodep: %p nodep->idx: 0x%lx " + "nodep->num_after: 0x%lx\n" + " MASK_BITS: %lu", + prev, prev->idx, prev->num_after, + nodep, nodep->idx, nodep->num_after, + MASK_BITS); + error_detected = true; + break; + } + + /* + * When the node has all mask bits set, it shouldn't + * be adjacent to the last bit described by the + * previous node. + */ + if (nodep->mask == ~(mask_t) 0 && + prev->idx + MASK_BITS + prev->num_after == nodep->idx) { + fprintf(stderr, "Current node has mask with " + "all bits set and is adjacent to the " + "previous node,\n" + " prev: %p prev->idx: 0x%lx " + "prev->num_after: 0x%lx\n" + " nodep: %p nodep->idx: 0x%lx " + "nodep->num_after: 0x%lx\n" + " MASK_BITS: %lu", + prev, prev->idx, prev->num_after, + nodep, nodep->idx, nodep->num_after, + MASK_BITS); + + error_detected = true; + break; + } + } + } + + if (!error_detected) { + /* + * Is sum of bits set in each node equal to the count + * of total bits set. + */ + if (s->num_set != total_bits_set) { + fprintf(stderr, "Number of bits set missmatch,\n" + " s->num_set: 0x%lx total_bits_set: 0x%lx", + s->num_set, total_bits_set); + + error_detected = true; + } + } + + if (error_detected) { + fputs(" dump_internal:\n", stderr); + sparsebit_dump_internal(stderr, s, 4); + abort(); + } +} + + +#ifdef FUZZ +/* A simple but effective fuzzing driver. Look for bugs with the help + * of some invariants and of a trivial representation of sparsebit. + * Just use 512 bytes of /dev/zero and /dev/urandom as inputs, and let + * afl-fuzz do the magic. :) + */ + +#include <stdlib.h> +#include <assert.h> + +struct range { + sparsebit_idx_t first, last; + bool set; +}; + +struct sparsebit *s; +struct range ranges[1000]; +int num_ranges; + +static bool get_value(sparsebit_idx_t idx) +{ + int i; + + for (i = num_ranges; --i >= 0; ) + if (ranges[i].first <= idx && idx <= ranges[i].last) + return ranges[i].set; + + return false; +} + +static void operate(int code, sparsebit_idx_t first, sparsebit_idx_t last) +{ + sparsebit_num_t num; + sparsebit_idx_t next; + + if (first < last) { + num = last - first + 1; + } else { + num = first - last + 1; + first = last; + last = first + num - 1; + } + + switch (code) { + case 0: + sparsebit_set(s, first); + assert(sparsebit_is_set(s, first)); + assert(!sparsebit_is_clear(s, first)); + assert(sparsebit_any_set(s)); + assert(!sparsebit_all_clear(s)); + if (get_value(first)) + return; + if (num_ranges == 1000) + exit(0); + ranges[num_ranges++] = (struct range) + { .first = first, .last = first, .set = true }; + break; + case 1: + sparsebit_clear(s, first); + assert(!sparsebit_is_set(s, first)); + assert(sparsebit_is_clear(s, first)); + assert(sparsebit_any_clear(s)); + assert(!sparsebit_all_set(s)); + if (!get_value(first)) + return; + if (num_ranges == 1000) + exit(0); + ranges[num_ranges++] = (struct range) + { .first = first, .last = first, .set = false }; + break; + case 2: + assert(sparsebit_is_set(s, first) == get_value(first)); + assert(sparsebit_is_clear(s, first) == !get_value(first)); + break; + case 3: + if (sparsebit_any_set(s)) + assert(get_value(sparsebit_first_set(s))); + if (sparsebit_any_clear(s)) + assert(!get_value(sparsebit_first_clear(s))); + sparsebit_set_all(s); + assert(!sparsebit_any_clear(s)); + assert(sparsebit_all_set(s)); + num_ranges = 0; + ranges[num_ranges++] = (struct range) + { .first = 0, .last = ~(sparsebit_idx_t)0, .set = true }; + break; + case 4: + if (sparsebit_any_set(s)) + assert(get_value(sparsebit_first_set(s))); + if (sparsebit_any_clear(s)) + assert(!get_value(sparsebit_first_clear(s))); + sparsebit_clear_all(s); + assert(!sparsebit_any_set(s)); + assert(sparsebit_all_clear(s)); + num_ranges = 0; + break; + case 5: + next = sparsebit_next_set(s, first); + assert(next == 0 || next > first); + assert(next == 0 || get_value(next)); + break; + case 6: + next = sparsebit_next_clear(s, first); + assert(next == 0 || next > first); + assert(next == 0 || !get_value(next)); + break; + case 7: + next = sparsebit_next_clear(s, first); + if (sparsebit_is_set_num(s, first, num)) { + assert(next == 0 || next > last); + if (first) + next = sparsebit_next_set(s, first - 1); + else if (sparsebit_any_set(s)) + next = sparsebit_first_set(s); + else + return; + assert(next == first); + } else { + assert(sparsebit_is_clear(s, first) || next <= last); + } + break; + case 8: + next = sparsebit_next_set(s, first); + if (sparsebit_is_clear_num(s, first, num)) { + assert(next == 0 || next > last); + if (first) + next = sparsebit_next_clear(s, first - 1); + else if (sparsebit_any_clear(s)) + next = sparsebit_first_clear(s); + else + return; + assert(next == first); + } else { + assert(sparsebit_is_set(s, first) || next <= last); + } + break; + case 9: + sparsebit_set_num(s, first, num); + assert(sparsebit_is_set_num(s, first, num)); + assert(!sparsebit_is_clear_num(s, first, num)); + assert(sparsebit_any_set(s)); + assert(!sparsebit_all_clear(s)); + if (num_ranges == 1000) + exit(0); + ranges[num_ranges++] = (struct range) + { .first = first, .last = last, .set = true }; + break; + case 10: + sparsebit_clear_num(s, first, num); + assert(!sparsebit_is_set_num(s, first, num)); + assert(sparsebit_is_clear_num(s, first, num)); + assert(sparsebit_any_clear(s)); + assert(!sparsebit_all_set(s)); + if (num_ranges == 1000) + exit(0); + ranges[num_ranges++] = (struct range) + { .first = first, .last = last, .set = false }; + break; + case 11: + sparsebit_validate_internal(s); + break; + default: + break; + } +} + +unsigned char get8(void) +{ + int ch; + + ch = getchar(); + if (ch == EOF) + exit(0); + return ch; +} + +uint64_t get64(void) +{ + uint64_t x; + + x = get8(); + x = (x << 8) | get8(); + x = (x << 8) | get8(); + x = (x << 8) | get8(); + x = (x << 8) | get8(); + x = (x << 8) | get8(); + x = (x << 8) | get8(); + return (x << 8) | get8(); +} + +int main(void) +{ + s = sparsebit_alloc(); + for (;;) { + uint8_t op = get8() & 0xf; + uint64_t first = get64(); + uint64_t last = get64(); + + operate(op, first, last); + } +} +#endif diff --git a/tools/testing/selftests/kvm/lib/x86.c b/tools/testing/selftests/kvm/lib/x86.c new file mode 100644 index 000000000000..2f17675f4275 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86.c @@ -0,0 +1,700 @@ +/* + * tools/testing/selftests/kvm/lib/x86.c + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include "test_util.h" +#include "kvm_util.h" +#include "kvm_util_internal.h" +#include "x86.h" + +/* Minimum physical address used for virtual translation tables. */ +#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 + +/* Virtual translation table structure declarations */ +struct pageMapL4Entry { + uint64_t present:1; + uint64_t writable:1; + uint64_t user:1; + uint64_t write_through:1; + uint64_t cache_disable:1; + uint64_t accessed:1; + uint64_t ignored_06:1; + uint64_t page_size:1; + uint64_t ignored_11_08:4; + uint64_t address:40; + uint64_t ignored_62_52:11; + uint64_t execute_disable:1; +}; + +struct pageDirectoryPointerEntry { + uint64_t present:1; + uint64_t writable:1; + uint64_t user:1; + uint64_t write_through:1; + uint64_t cache_disable:1; + uint64_t accessed:1; + uint64_t ignored_06:1; + uint64_t page_size:1; + uint64_t ignored_11_08:4; + uint64_t address:40; + uint64_t ignored_62_52:11; + uint64_t execute_disable:1; +}; + +struct pageDirectoryEntry { + uint64_t present:1; + uint64_t writable:1; + uint64_t user:1; + uint64_t write_through:1; + uint64_t cache_disable:1; + uint64_t accessed:1; + uint64_t ignored_06:1; + uint64_t page_size:1; + uint64_t ignored_11_08:4; + uint64_t address:40; + uint64_t ignored_62_52:11; + uint64_t execute_disable:1; +}; + +struct pageTableEntry { + uint64_t present:1; + uint64_t writable:1; + uint64_t user:1; + uint64_t write_through:1; + uint64_t cache_disable:1; + uint64_t accessed:1; + uint64_t dirty:1; + uint64_t reserved_07:1; + uint64_t global:1; + uint64_t ignored_11_09:3; + uint64_t address:40; + uint64_t ignored_62_52:11; + uint64_t execute_disable:1; +}; + +/* Register Dump + * + * Input Args: + * indent - Left margin indent amount + * regs - register + * + * Output Args: + * stream - Output FILE stream + * + * Return: None + * + * Dumps the state of the registers given by regs, to the FILE stream + * given by steam. + */ +void regs_dump(FILE *stream, struct kvm_regs *regs, + uint8_t indent) +{ + fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx " + "rcx: 0x%.16llx rdx: 0x%.16llx\n", + indent, "", + regs->rax, regs->rbx, regs->rcx, regs->rdx); + fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx " + "rsp: 0x%.16llx rbp: 0x%.16llx\n", + indent, "", + regs->rsi, regs->rdi, regs->rsp, regs->rbp); + fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx " + "r10: 0x%.16llx r11: 0x%.16llx\n", + indent, "", + regs->r8, regs->r9, regs->r10, regs->r11); + fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx " + "r14: 0x%.16llx r15: 0x%.16llx\n", + indent, "", + regs->r12, regs->r13, regs->r14, regs->r15); + fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n", + indent, "", + regs->rip, regs->rflags); +} + +/* Segment Dump + * + * Input Args: + * indent - Left margin indent amount + * segment - KVM segment + * + * Output Args: + * stream - Output FILE stream + * + * Return: None + * + * Dumps the state of the KVM segment given by segment, to the FILE stream + * given by steam. + */ +static void segment_dump(FILE *stream, struct kvm_segment *segment, + uint8_t indent) +{ + fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x " + "selector: 0x%.4x type: 0x%.2x\n", + indent, "", segment->base, segment->limit, + segment->selector, segment->type); + fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x " + "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n", + indent, "", segment->present, segment->dpl, + segment->db, segment->s, segment->l); + fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x " + "unusable: 0x%.2x padding: 0x%.2x\n", + indent, "", segment->g, segment->avl, + segment->unusable, segment->padding); +} + +/* dtable Dump + * + * Input Args: + * indent - Left margin indent amount + * dtable - KVM dtable + * + * Output Args: + * stream - Output FILE stream + * + * Return: None + * + * Dumps the state of the KVM dtable given by dtable, to the FILE stream + * given by steam. + */ +static void dtable_dump(FILE *stream, struct kvm_dtable *dtable, + uint8_t indent) +{ + fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x " + "padding: 0x%.4x 0x%.4x 0x%.4x\n", + indent, "", dtable->base, dtable->limit, + dtable->padding[0], dtable->padding[1], dtable->padding[2]); +} + +/* System Register Dump + * + * Input Args: + * indent - Left margin indent amount + * sregs - System registers + * + * Output Args: + * stream - Output FILE stream + * + * Return: None + * + * Dumps the state of the system registers given by sregs, to the FILE stream + * given by steam. + */ +void sregs_dump(FILE *stream, struct kvm_sregs *sregs, + uint8_t indent) +{ + unsigned int i; + + fprintf(stream, "%*scs:\n", indent, ""); + segment_dump(stream, &sregs->cs, indent + 2); + fprintf(stream, "%*sds:\n", indent, ""); + segment_dump(stream, &sregs->ds, indent + 2); + fprintf(stream, "%*ses:\n", indent, ""); + segment_dump(stream, &sregs->es, indent + 2); + fprintf(stream, "%*sfs:\n", indent, ""); + segment_dump(stream, &sregs->fs, indent + 2); + fprintf(stream, "%*sgs:\n", indent, ""); + segment_dump(stream, &sregs->gs, indent + 2); + fprintf(stream, "%*sss:\n", indent, ""); + segment_dump(stream, &sregs->ss, indent + 2); + fprintf(stream, "%*str:\n", indent, ""); + segment_dump(stream, &sregs->tr, indent + 2); + fprintf(stream, "%*sldt:\n", indent, ""); + segment_dump(stream, &sregs->ldt, indent + 2); + + fprintf(stream, "%*sgdt:\n", indent, ""); + dtable_dump(stream, &sregs->gdt, indent + 2); + fprintf(stream, "%*sidt:\n", indent, ""); + dtable_dump(stream, &sregs->idt, indent + 2); + + fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx " + "cr3: 0x%.16llx cr4: 0x%.16llx\n", + indent, "", + sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4); + fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx " + "apic_base: 0x%.16llx\n", + indent, "", + sregs->cr8, sregs->efer, sregs->apic_base); + + fprintf(stream, "%*sinterrupt_bitmap:\n", indent, ""); + for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) { + fprintf(stream, "%*s%.16llx\n", indent + 2, "", + sregs->interrupt_bitmap[i]); + } +} + +void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) +{ + int rc; + + TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " + "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + + /* If needed, create page map l4 table. */ + if (!vm->pgd_created) { + vm_paddr_t paddr = vm_phy_page_alloc(vm, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot); + vm->pgd = paddr; + + /* Set pointer to pgd tables in all the VCPUs that + * have already been created. Future VCPUs will have + * the value set as each one is created. + */ + for (struct vcpu *vcpu = vm->vcpu_head; vcpu; + vcpu = vcpu->next) { + struct kvm_sregs sregs; + + /* Obtain the current system register settings */ + vcpu_sregs_get(vm, vcpu->id, &sregs); + + /* Set and store the pointer to the start of the + * pgd tables. + */ + sregs.cr3 = vm->pgd; + vcpu_sregs_set(vm, vcpu->id, &sregs); + } + + vm->pgd_created = true; + } +} + +/* VM Virtual Page Map + * + * Input Args: + * vm - Virtual Machine + * vaddr - VM Virtual Address + * paddr - VM Physical Address + * pgd_memslot - Memory region slot for new virtual translation tables + * + * Output Args: None + * + * Return: None + * + * Within the VM given by vm, creates a virtual translation for the page + * starting at vaddr to the page starting at paddr. + */ +void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, + uint32_t pgd_memslot) +{ + uint16_t index[4]; + struct pageMapL4Entry *pml4e; + + TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " + "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + + TEST_ASSERT((vaddr % vm->page_size) == 0, + "Virtual address not on page boundary,\n" + " vaddr: 0x%lx vm->page_size: 0x%x", + vaddr, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, + (vaddr >> vm->page_shift)), + "Invalid virtual address, vaddr: 0x%lx", + vaddr); + TEST_ASSERT((paddr % vm->page_size) == 0, + "Physical address not on page boundary,\n" + " paddr: 0x%lx vm->page_size: 0x%x", + paddr, vm->page_size); + TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + + index[0] = (vaddr >> 12) & 0x1ffu; + index[1] = (vaddr >> 21) & 0x1ffu; + index[2] = (vaddr >> 30) & 0x1ffu; + index[3] = (vaddr >> 39) & 0x1ffu; + + /* Allocate page directory pointer table if not present. */ + pml4e = addr_gpa2hva(vm, vm->pgd); + if (!pml4e[index[3]].present) { + pml4e[index[3]].address = vm_phy_page_alloc(vm, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) + >> vm->page_shift; + pml4e[index[3]].writable = true; + pml4e[index[3]].present = true; + } + + /* Allocate page directory table if not present. */ + struct pageDirectoryPointerEntry *pdpe; + pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); + if (!pdpe[index[2]].present) { + pdpe[index[2]].address = vm_phy_page_alloc(vm, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) + >> vm->page_shift; + pdpe[index[2]].writable = true; + pdpe[index[2]].present = true; + } + + /* Allocate page table if not present. */ + struct pageDirectoryEntry *pde; + pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); + if (!pde[index[1]].present) { + pde[index[1]].address = vm_phy_page_alloc(vm, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) + >> vm->page_shift; + pde[index[1]].writable = true; + pde[index[1]].present = true; + } + + /* Fill in page table entry. */ + struct pageTableEntry *pte; + pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); + pte[index[0]].address = paddr >> vm->page_shift; + pte[index[0]].writable = true; + pte[index[0]].present = 1; +} + +/* Virtual Translation Tables Dump + * + * Input Args: + * vm - Virtual Machine + * indent - Left margin indent amount + * + * Output Args: + * stream - Output FILE stream + * + * Return: None + * + * Dumps to the FILE stream given by stream, the contents of all the + * virtual translation tables for the VM given by vm. + */ +void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +{ + struct pageMapL4Entry *pml4e, *pml4e_start; + struct pageDirectoryPointerEntry *pdpe, *pdpe_start; + struct pageDirectoryEntry *pde, *pde_start; + struct pageTableEntry *pte, *pte_start; + + if (!vm->pgd_created) + return; + + fprintf(stream, "%*s " + " no\n", indent, ""); + fprintf(stream, "%*s index hvaddr gpaddr " + "addr w exec dirty\n", + indent, ""); + pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm, + vm->pgd); + for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) { + pml4e = &pml4e_start[n1]; + if (!pml4e->present) + continue; + fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u " + " %u\n", + indent, "", + pml4e - pml4e_start, pml4e, + addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address, + pml4e->writable, pml4e->execute_disable); + + pdpe_start = addr_gpa2hva(vm, pml4e->address + * vm->page_size); + for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) { + pdpe = &pdpe_start[n2]; + if (!pdpe->present) + continue; + fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx " + "%u %u\n", + indent, "", + pdpe - pdpe_start, pdpe, + addr_hva2gpa(vm, pdpe), + (uint64_t) pdpe->address, pdpe->writable, + pdpe->execute_disable); + + pde_start = addr_gpa2hva(vm, + pdpe->address * vm->page_size); + for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) { + pde = &pde_start[n3]; + if (!pde->present) + continue; + fprintf(stream, "%*spde 0x%-3zx %p " + "0x%-12lx 0x%-10lx %u %u\n", + indent, "", pde - pde_start, pde, + addr_hva2gpa(vm, pde), + (uint64_t) pde->address, pde->writable, + pde->execute_disable); + + pte_start = addr_gpa2hva(vm, + pde->address * vm->page_size); + for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) { + pte = &pte_start[n4]; + if (!pte->present) + continue; + fprintf(stream, "%*spte 0x%-3zx %p " + "0x%-12lx 0x%-10lx %u %u " + " %u 0x%-10lx\n", + indent, "", + pte - pte_start, pte, + addr_hva2gpa(vm, pte), + (uint64_t) pte->address, + pte->writable, + pte->execute_disable, + pte->dirty, + ((uint64_t) n1 << 27) + | ((uint64_t) n2 << 18) + | ((uint64_t) n3 << 9) + | ((uint64_t) n4)); + } + } + } + } +} + +/* Set Unusable Segment + * + * Input Args: None + * + * Output Args: + * segp - Pointer to segment register + * + * Return: None + * + * Sets the segment register pointed to by segp to an unusable state. + */ +static void kvm_seg_set_unusable(struct kvm_segment *segp) +{ + memset(segp, 0, sizeof(*segp)); + segp->unusable = true; +} + +/* Set Long Mode Flat Kernel Code Segment + * + * Input Args: + * selector - selector value + * + * Output Args: + * segp - Pointer to KVM segment + * + * Return: None + * + * Sets up the KVM segment pointed to by segp, to be a code segment + * with the selector value given by selector. + */ +static void kvm_seg_set_kernel_code_64bit(uint16_t selector, + struct kvm_segment *segp) +{ + memset(segp, 0, sizeof(*segp)); + segp->selector = selector; + segp->limit = 0xFFFFFFFFu; + segp->s = 0x1; /* kTypeCodeData */ + segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed + * | kFlagCodeReadable + */ + segp->g = true; + segp->l = true; + segp->present = 1; +} + +/* Set Long Mode Flat Kernel Data Segment + * + * Input Args: + * selector - selector value + * + * Output Args: + * segp - Pointer to KVM segment + * + * Return: None + * + * Sets up the KVM segment pointed to by segp, to be a data segment + * with the selector value given by selector. + */ +static void kvm_seg_set_kernel_data_64bit(uint16_t selector, + struct kvm_segment *segp) +{ + memset(segp, 0, sizeof(*segp)); + segp->selector = selector; + segp->limit = 0xFFFFFFFFu; + segp->s = 0x1; /* kTypeCodeData */ + segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed + * | kFlagDataWritable + */ + segp->g = true; + segp->present = true; +} + +/* Address Guest Virtual to Guest Physical + * + * Input Args: + * vm - Virtual Machine + * gpa - VM virtual address + * + * Output Args: None + * + * Return: + * Equivalent VM physical address + * + * Translates the VM virtual address given by gva to a VM physical + * address and then locates the memory region containing the VM + * physical address, within the VM given by vm. When found, the host + * virtual address providing the memory to the vm physical address is returned. + * A TEST_ASSERT failure occurs if no region containing translated + * VM virtual address exists. + */ +vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +{ + uint16_t index[4]; + struct pageMapL4Entry *pml4e; + struct pageDirectoryPointerEntry *pdpe; + struct pageDirectoryEntry *pde; + struct pageTableEntry *pte; + void *hva; + + TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " + "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + + index[0] = (gva >> 12) & 0x1ffu; + index[1] = (gva >> 21) & 0x1ffu; + index[2] = (gva >> 30) & 0x1ffu; + index[3] = (gva >> 39) & 0x1ffu; + + if (!vm->pgd_created) + goto unmapped_gva; + pml4e = addr_gpa2hva(vm, vm->pgd); + if (!pml4e[index[3]].present) + goto unmapped_gva; + + pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); + if (!pdpe[index[2]].present) + goto unmapped_gva; + + pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); + if (!pde[index[1]].present) + goto unmapped_gva; + + pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); + if (!pte[index[0]].present) + goto unmapped_gva; + + return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu); + +unmapped_gva: + TEST_ASSERT(false, "No mapping for vm virtual address, " + "gva: 0x%lx", gva); +} + +void vcpu_setup(struct kvm_vm *vm, int vcpuid) +{ + struct kvm_sregs sregs; + + /* Set mode specific system register values. */ + vcpu_sregs_get(vm, vcpuid, &sregs); + + switch (vm->mode) { + case VM_MODE_FLAT48PG: + sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; + sregs.cr4 |= X86_CR4_PAE; + sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); + + kvm_seg_set_unusable(&sregs.ldt); + kvm_seg_set_kernel_code_64bit(0x8, &sregs.cs); + kvm_seg_set_kernel_data_64bit(0x10, &sregs.ds); + kvm_seg_set_kernel_data_64bit(0x10, &sregs.es); + break; + + default: + TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode); + } + vcpu_sregs_set(vm, vcpuid, &sregs); + + /* If virtual translation table have been setup, set system register + * to point to the tables. It's okay if they haven't been setup yet, + * in that the code that sets up the virtual translation tables, will + * go back through any VCPUs that have already been created and set + * their values. + */ + if (vm->pgd_created) { + struct kvm_sregs sregs; + + vcpu_sregs_get(vm, vcpuid, &sregs); + + sregs.cr3 = vm->pgd; + vcpu_sregs_set(vm, vcpuid, &sregs); + } +} +/* Adds a vCPU with reasonable defaults (i.e., a stack) + * + * Input Args: + * vcpuid - The id of the VCPU to add to the VM. + * guest_code - The vCPU's entry point + */ +void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) +{ + struct kvm_mp_state mp_state; + struct kvm_regs regs; + vm_vaddr_t stack_vaddr; + stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), + DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0); + + /* Create VCPU */ + vm_vcpu_add(vm, vcpuid); + + /* Setup guest general purpose registers */ + vcpu_regs_get(vm, vcpuid, ®s); + regs.rflags = regs.rflags | 0x2; + regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()); + regs.rip = (unsigned long) guest_code; + vcpu_regs_set(vm, vcpuid, ®s); + + /* Setup the MP state */ + mp_state.mp_state = 0; + vcpu_set_mp_state(vm, vcpuid, &mp_state); +} + +/* VM VCPU CPUID Set + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU id + * cpuid - The CPUID values to set. + * + * Output Args: None + * + * Return: void + * + * Set the VCPU's CPUID. + */ +void vcpu_set_cpuid(struct kvm_vm *vm, + uint32_t vcpuid, struct kvm_cpuid2 *cpuid) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int rc; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid); + TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i", + rc, errno); + +} +/* Create a VM with reasonable defaults + * + * Input Args: + * vcpuid - The id of the single VCPU to add to the VM. + * guest_code - The vCPU's entry point + * + * Output Args: None + * + * Return: + * Pointer to opaque structure that describes the created VM. + */ +struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code) +{ + struct kvm_vm *vm; + + /* Create VM */ + vm = vm_create(VM_MODE_FLAT48PG, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + + /* Setup guest code */ + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); + + /* Setup IRQ Chip */ + vm_create_irqchip(vm); + + /* Add the first vCPU. */ + vm_vcpu_add_default(vm, vcpuid, guest_code); + + return vm; +} diff --git a/tools/testing/selftests/kvm/set_sregs_test.c b/tools/testing/selftests/kvm/set_sregs_test.c new file mode 100644 index 000000000000..090fd3f19352 --- /dev/null +++ b/tools/testing/selftests/kvm/set_sregs_test.c @@ -0,0 +1,54 @@ +/* + * KVM_SET_SREGS tests + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * This is a regression test for the bug fixed by the following commit: + * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values") + * + * That bug allowed a user-mode program that called the KVM_SET_SREGS + * ioctl to put a VCPU's local APIC into an invalid state. + * + */ +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" + +#include "kvm_util.h" +#include "x86.h" + +#define VCPU_ID 5 + +int main(int argc, char *argv[]) +{ + struct kvm_sregs sregs; + struct kvm_vm *vm; + int rc; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + /* Create VM */ + vm = vm_create_default(VCPU_ID, NULL); + + vcpu_sregs_get(vm, VCPU_ID, &sregs); + sregs.apic_base = 1 << 10; + rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs); + TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)", + sregs.apic_base); + sregs.apic_base = 1 << 11; + rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs); + TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)", + sregs.apic_base); + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/sync_regs_test.c b/tools/testing/selftests/kvm/sync_regs_test.c new file mode 100644 index 000000000000..428e9473f5e2 --- /dev/null +++ b/tools/testing/selftests/kvm/sync_regs_test.c @@ -0,0 +1,232 @@ +/* + * Test for x86 KVM_CAP_SYNC_REGS + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Verifies expected behavior of x86 KVM_CAP_SYNC_REGS functionality, + * including requesting an invalid register set, updates to/from values + * in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled. + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "x86.h" + +#define VCPU_ID 5 +#define PORT_HOST_SYNC 0x1000 + +static void __exit_to_l0(uint16_t port, uint64_t arg0, uint64_t arg1) +{ + __asm__ __volatile__("in %[port], %%al" + : + : [port]"d"(port), "D"(arg0), "S"(arg1) + : "rax"); +} + +#define exit_to_l0(_port, _arg0, _arg1) \ + __exit_to_l0(_port, (uint64_t) (_arg0), (uint64_t) (_arg1)) + +#define GUEST_ASSERT(_condition) do { \ + if (!(_condition)) \ + exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition, 0);\ +} while (0) + +void guest_code(void) +{ + for (;;) { + exit_to_l0(PORT_HOST_SYNC, "hello", 0); + asm volatile ("inc %r11"); + } +} + +static void compare_regs(struct kvm_regs *left, struct kvm_regs *right) +{ +#define REG_COMPARE(reg) \ + TEST_ASSERT(left->reg == right->reg, \ + "Register " #reg \ + " values did not match: 0x%llx, 0x%llx\n", \ + left->reg, right->reg) + REG_COMPARE(rax); + REG_COMPARE(rbx); + REG_COMPARE(rcx); + REG_COMPARE(rdx); + REG_COMPARE(rsi); + REG_COMPARE(rdi); + REG_COMPARE(rsp); + REG_COMPARE(rbp); + REG_COMPARE(r8); + REG_COMPARE(r9); + REG_COMPARE(r10); + REG_COMPARE(r11); + REG_COMPARE(r12); + REG_COMPARE(r13); + REG_COMPARE(r14); + REG_COMPARE(r15); + REG_COMPARE(rip); + REG_COMPARE(rflags); +#undef REG_COMPARE +} + +static void compare_sregs(struct kvm_sregs *left, struct kvm_sregs *right) +{ +} + +static void compare_vcpu_events(struct kvm_vcpu_events *left, + struct kvm_vcpu_events *right) +{ +} + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct kvm_run *run; + struct kvm_regs regs; + struct kvm_sregs sregs; + struct kvm_vcpu_events events; + int rv, cap; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + cap = kvm_check_cap(KVM_CAP_SYNC_REGS); + TEST_ASSERT((unsigned long)cap == KVM_SYNC_X86_VALID_FIELDS, + "KVM_CAP_SYNC_REGS (0x%x) != KVM_SYNC_X86_VALID_FIELDS (0x%lx)\n", + cap, KVM_SYNC_X86_VALID_FIELDS); + + /* Create VM */ + vm = vm_create_default(VCPU_ID, guest_code); + + run = vcpu_state(vm, VCPU_ID); + + /* Request reading invalid register set from VCPU. */ + run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS << 1; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + rv); + vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0; + + /* Request setting invalid register set into VCPU. */ + run->kvm_dirty_regs = KVM_SYNC_X86_VALID_FIELDS << 1; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + rv); + vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0; + + /* Request and verify all valid register sets. */ + /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */ + run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + vcpu_regs_get(vm, VCPU_ID, ®s); + compare_regs(®s, &run->s.regs.regs); + + vcpu_sregs_get(vm, VCPU_ID, &sregs); + compare_sregs(&sregs, &run->s.regs.sregs); + + vcpu_events_get(vm, VCPU_ID, &events); + compare_vcpu_events(&events, &run->s.regs.events); + + /* Set and verify various register values. */ + run->s.regs.regs.r11 = 0xBAD1DEA; + run->s.regs.sregs.apic_base = 1 << 11; + /* TODO run->s.regs.events.XYZ = ABC; */ + + run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS; + run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->s.regs.regs.r11 == 0xBAD1DEA + 1, + "r11 sync regs value incorrect 0x%llx.", + run->s.regs.regs.r11); + TEST_ASSERT(run->s.regs.sregs.apic_base == 1 << 11, + "apic_base sync regs value incorrect 0x%llx.", + run->s.regs.sregs.apic_base); + + vcpu_regs_get(vm, VCPU_ID, ®s); + compare_regs(®s, &run->s.regs.regs); + + vcpu_sregs_get(vm, VCPU_ID, &sregs); + compare_sregs(&sregs, &run->s.regs.sregs); + + vcpu_events_get(vm, VCPU_ID, &events); + compare_vcpu_events(&events, &run->s.regs.events); + + /* Clear kvm_dirty_regs bits, verify new s.regs values are + * overwritten with existing guest values. + */ + run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS; + run->kvm_dirty_regs = 0; + run->s.regs.regs.r11 = 0xDEADBEEF; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->s.regs.regs.r11 != 0xDEADBEEF, + "r11 sync regs value incorrect 0x%llx.", + run->s.regs.regs.r11); + + /* Clear kvm_valid_regs bits and kvm_dirty_bits. + * Verify s.regs values are not overwritten with existing guest values + * and that guest values are not overwritten with kvm_sync_regs values. + */ + run->kvm_valid_regs = 0; + run->kvm_dirty_regs = 0; + run->s.regs.regs.r11 = 0xAAAA; + regs.r11 = 0xBAC0; + vcpu_regs_set(vm, VCPU_ID, ®s); + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->s.regs.regs.r11 == 0xAAAA, + "r11 sync regs value incorrect 0x%llx.", + run->s.regs.regs.r11); + vcpu_regs_get(vm, VCPU_ID, ®s); + TEST_ASSERT(regs.r11 == 0xBAC0 + 1, + "r11 guest value incorrect 0x%llx.", + regs.r11); + + /* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten + * with existing guest values but that guest values are overwritten + * with kvm_sync_regs values. + */ + run->kvm_valid_regs = 0; + run->kvm_dirty_regs = KVM_SYNC_X86_VALID_FIELDS; + run->s.regs.regs.r11 = 0xBBBB; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->s.regs.regs.r11 == 0xBBBB, + "r11 sync regs value incorrect 0x%llx.", + run->s.regs.regs.r11); + vcpu_regs_get(vm, VCPU_ID, ®s); + TEST_ASSERT(regs.r11 == 0xBBBB + 1, + "r11 guest value incorrect 0x%llx.", + regs.r11); + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 5bef05d6ba39..195e9d4739a9 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -20,6 +20,7 @@ all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) .ONESHELL: define RUN_TESTS + @export KSFT_TAP_LEVEL=`echo 1`; @test_num=`echo 0`; @echo "TAP version 13"; @for TEST in $(1); do \ @@ -77,7 +78,7 @@ endif define EMIT_TESTS @for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \ BASENAME_TEST=`basename $$TEST`; \ - echo "(./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \ + echo "(./$$BASENAME_TEST >> \$$OUTPUT 2>&1 && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \ done; endef diff --git a/tools/testing/selftests/media_tests/Makefile b/tools/testing/selftests/media_tests/Makefile index be5bd4ffb895..c82cec2497de 100644 --- a/tools/testing/selftests/media_tests/Makefile +++ b/tools/testing/selftests/media_tests/Makefile @@ -1,8 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS := media_device_test media_device_open video_device_test -all: $(TEST_PROGS) +TEST_GEN_PROGS := media_device_test media_device_open video_device_test +all: $(TEST_GEN_PROGS) include ../lib.mk - -clean: - rm -fr media_device_test media_device_open video_device_test diff --git a/tools/testing/selftests/media_tests/media_device_open.c b/tools/testing/selftests/media_tests/media_device_open.c index 44343c091a20..a5ce5434bafd 100644 --- a/tools/testing/selftests/media_tests/media_device_open.c +++ b/tools/testing/selftests/media_tests/media_device_open.c @@ -1,10 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 + /* * media_device_open.c - Media Controller Device Open Test * * Copyright (c) 2016 Shuah Khan <shuahkh@osg.samsung.com> * Copyright (c) 2016 Samsung Electronics Co., Ltd. * - * This file is released under the GPLv2. */ /* diff --git a/tools/testing/selftests/media_tests/media_device_test.c b/tools/testing/selftests/media_tests/media_device_test.c index 5d49943e77d0..421a367e4bb3 100644 --- a/tools/testing/selftests/media_tests/media_device_test.c +++ b/tools/testing/selftests/media_tests/media_device_test.c @@ -1,10 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 + /* * media_device_test.c - Media Controller Device ioctl loop Test * * Copyright (c) 2016 Shuah Khan <shuahkh@osg.samsung.com> * Copyright (c) 2016 Samsung Electronics Co., Ltd. * - * This file is released under the GPLv2. */ /* diff --git a/tools/testing/selftests/media_tests/video_device_test.c b/tools/testing/selftests/media_tests/video_device_test.c index 66d419c28653..0f6aef2e2593 100644 --- a/tools/testing/selftests/media_tests/video_device_test.c +++ b/tools/testing/selftests/media_tests/video_device_test.c @@ -1,10 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 + /* * video_device_test - Video Device Test * * Copyright (c) 2016 Shuah Khan <shuahkh@osg.samsung.com> * Copyright (c) 2016 Samsung Electronics Co., Ltd. * - * This file is released under the GPLv2. */ /* diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test.c index 9e674d9514d1..22bffd55a523 100644 --- a/tools/testing/selftests/membarrier/membarrier_test.c +++ b/tools/testing/selftests/membarrier/membarrier_test.c @@ -16,49 +16,210 @@ static int sys_membarrier(int cmd, int flags) static int test_membarrier_cmd_fail(void) { int cmd = -1, flags = 0; + const char *test_name = "sys membarrier invalid command"; if (sys_membarrier(cmd, flags) != -1) { ksft_exit_fail_msg( - "sys membarrier invalid command test: command = %d, flags = %d. Should fail, but passed\n", - cmd, flags); + "%s test: command = %d, flags = %d. Should fail, but passed\n", + test_name, cmd, flags); + } + if (errno != EINVAL) { + ksft_exit_fail_msg( + "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n", + test_name, flags, EINVAL, strerror(EINVAL), + errno, strerror(errno)); } ksft_test_result_pass( - "sys membarrier invalid command test: command = %d, flags = %d. Failed as expected\n", - cmd, flags); + "%s test: command = %d, flags = %d, errno = %d. Failed as expected\n", + test_name, cmd, flags, errno); return 0; } static int test_membarrier_flags_fail(void) { int cmd = MEMBARRIER_CMD_QUERY, flags = 1; + const char *test_name = "sys membarrier MEMBARRIER_CMD_QUERY invalid flags"; + + if (sys_membarrier(cmd, flags) != -1) { + ksft_exit_fail_msg( + "%s test: flags = %d. Should fail, but passed\n", + test_name, flags); + } + if (errno != EINVAL) { + ksft_exit_fail_msg( + "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n", + test_name, flags, EINVAL, strerror(EINVAL), + errno, strerror(errno)); + } + + ksft_test_result_pass( + "%s test: flags = %d, errno = %d. Failed as expected\n", + test_name, flags, errno); + return 0; +} + +static int test_membarrier_global_success(void) +{ + int cmd = MEMBARRIER_CMD_GLOBAL, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_GLOBAL"; + + if (sys_membarrier(cmd, flags) != 0) { + ksft_exit_fail_msg( + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); + } + + ksft_test_result_pass( + "%s test: flags = %d\n", test_name, flags); + return 0; +} + +static int test_membarrier_private_expedited_fail(void) +{ + int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED not registered failure"; + + if (sys_membarrier(cmd, flags) != -1) { + ksft_exit_fail_msg( + "%s test: flags = %d. Should fail, but passed\n", + test_name, flags); + } + if (errno != EPERM) { + ksft_exit_fail_msg( + "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n", + test_name, flags, EPERM, strerror(EPERM), + errno, strerror(errno)); + } + + ksft_test_result_pass( + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); + return 0; +} + +static int test_membarrier_register_private_expedited_success(void) +{ + int cmd = MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED"; + + if (sys_membarrier(cmd, flags) != 0) { + ksft_exit_fail_msg( + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); + } + + ksft_test_result_pass( + "%s test: flags = %d\n", + test_name, flags); + return 0; +} + +static int test_membarrier_private_expedited_success(void) +{ + int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED"; + + if (sys_membarrier(cmd, flags) != 0) { + ksft_exit_fail_msg( + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); + } + + ksft_test_result_pass( + "%s test: flags = %d\n", + test_name, flags); + return 0; +} + +static int test_membarrier_private_expedited_sync_core_fail(void) +{ + int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE not registered failure"; if (sys_membarrier(cmd, flags) != -1) { ksft_exit_fail_msg( - "sys membarrier MEMBARRIER_CMD_QUERY invalid flags test: flags = %d. Should fail, but passed\n", - flags); + "%s test: flags = %d. Should fail, but passed\n", + test_name, flags); + } + if (errno != EPERM) { + ksft_exit_fail_msg( + "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n", + test_name, flags, EPERM, strerror(EPERM), + errno, strerror(errno)); + } + + ksft_test_result_pass( + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); + return 0; +} + +static int test_membarrier_register_private_expedited_sync_core_success(void) +{ + int cmd = MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE"; + + if (sys_membarrier(cmd, flags) != 0) { + ksft_exit_fail_msg( + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); + } + + ksft_test_result_pass( + "%s test: flags = %d\n", + test_name, flags); + return 0; +} + +static int test_membarrier_private_expedited_sync_core_success(void) +{ + int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE"; + + if (sys_membarrier(cmd, flags) != 0) { + ksft_exit_fail_msg( + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); } ksft_test_result_pass( - "sys membarrier MEMBARRIER_CMD_QUERY invalid flags test: flags = %d. Failed as expected\n", - flags); + "%s test: flags = %d\n", + test_name, flags); return 0; } -static int test_membarrier_success(void) +static int test_membarrier_register_global_expedited_success(void) { - int cmd = MEMBARRIER_CMD_SHARED, flags = 0; - const char *test_name = "sys membarrier MEMBARRIER_CMD_SHARED\n"; + int cmd = MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED"; if (sys_membarrier(cmd, flags) != 0) { ksft_exit_fail_msg( - "sys membarrier MEMBARRIER_CMD_SHARED test: flags = %d\n", - flags); + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); } ksft_test_result_pass( - "sys membarrier MEMBARRIER_CMD_SHARED test: flags = %d\n", - flags); + "%s test: flags = %d\n", + test_name, flags); + return 0; +} + +static int test_membarrier_global_expedited_success(void) +{ + int cmd = MEMBARRIER_CMD_GLOBAL_EXPEDITED, flags = 0; + const char *test_name = "sys membarrier MEMBARRIER_CMD_GLOBAL_EXPEDITED"; + + if (sys_membarrier(cmd, flags) != 0) { + ksft_exit_fail_msg( + "%s test: flags = %d, errno = %d\n", + test_name, flags, errno); + } + + ksft_test_result_pass( + "%s test: flags = %d\n", + test_name, flags); return 0; } @@ -72,7 +233,45 @@ static int test_membarrier(void) status = test_membarrier_flags_fail(); if (status) return status; - status = test_membarrier_success(); + status = test_membarrier_global_success(); + if (status) + return status; + status = test_membarrier_private_expedited_fail(); + if (status) + return status; + status = test_membarrier_register_private_expedited_success(); + if (status) + return status; + status = test_membarrier_private_expedited_success(); + if (status) + return status; + status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0); + if (status < 0) { + ksft_test_result_fail("sys_membarrier() failed\n"); + return status; + } + if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) { + status = test_membarrier_private_expedited_sync_core_fail(); + if (status) + return status; + status = test_membarrier_register_private_expedited_sync_core_success(); + if (status) + return status; + status = test_membarrier_private_expedited_sync_core_success(); + if (status) + return status; + } + /* + * It is valid to send a global membarrier from a non-registered + * process. + */ + status = test_membarrier_global_expedited_success(); + if (status) + return status; + status = test_membarrier_register_global_expedited_success(); + if (status) + return status; + status = test_membarrier_global_expedited_success(); if (status) return status; return 0; @@ -94,8 +293,10 @@ static int test_membarrier_query(void) } ksft_exit_fail_msg("sys_membarrier() failed\n"); } - if (!(ret & MEMBARRIER_CMD_SHARED)) + if (!(ret & MEMBARRIER_CMD_GLOBAL)) { + ksft_test_result_fail("sys_membarrier() CMD_GLOBAL query failed\n"); ksft_exit_fail_msg("sys_membarrier is not supported.\n"); + } ksft_test_result_pass("sys_membarrier available\n"); return 0; @@ -108,5 +309,5 @@ int main(int argc, char **argv) test_membarrier_query(); test_membarrier(); - ksft_exit_pass(); + return ksft_exit_pass(); } diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile index 3926a0409dda..0862e6f47a38 100644 --- a/tools/testing/selftests/memfd/Makefile +++ b/tools/testing/selftests/memfd/Makefile @@ -5,6 +5,7 @@ CFLAGS += -I../../../../include/ CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_tests.sh +TEST_FILES := run_fuse_test.sh TEST_GEN_FILES := memfd_test fuse_mnt fuse_test fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags) @@ -12,3 +13,8 @@ fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags) include ../lib.mk $(OUTPUT)/fuse_mnt: LDLIBS += $(shell pkg-config fuse --libs) + +$(OUTPUT)/memfd_test: memfd_test.c common.o +$(OUTPUT)/fuse_test: fuse_test.c common.o + +EXTRA_CLEAN = common.o diff --git a/tools/testing/selftests/memfd/common.c b/tools/testing/selftests/memfd/common.c new file mode 100644 index 000000000000..8eb3d75f6e60 --- /dev/null +++ b/tools/testing/selftests/memfd/common.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#define __EXPORTED_HEADERS__ + +#include <stdio.h> +#include <stdlib.h> +#include <linux/fcntl.h> +#include <linux/memfd.h> +#include <unistd.h> +#include <sys/syscall.h> + +#include "common.h" + +int hugetlbfs_test = 0; + +/* + * Copied from mlock2-tests.c + */ +unsigned long default_huge_page_size(void) +{ + unsigned long hps = 0; + char *line = NULL; + size_t linelen = 0; + FILE *f = fopen("/proc/meminfo", "r"); + + if (!f) + return 0; + while (getline(&line, &linelen, f) > 0) { + if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) { + hps <<= 10; + break; + } + } + + free(line); + fclose(f); + return hps; +} + +int sys_memfd_create(const char *name, unsigned int flags) +{ + if (hugetlbfs_test) + flags |= MFD_HUGETLB; + + return syscall(__NR_memfd_create, name, flags); +} diff --git a/tools/testing/selftests/memfd/common.h b/tools/testing/selftests/memfd/common.h new file mode 100644 index 000000000000..522d2c630bd8 --- /dev/null +++ b/tools/testing/selftests/memfd/common.h @@ -0,0 +1,9 @@ +#ifndef COMMON_H_ +#define COMMON_H_ + +extern int hugetlbfs_test; + +unsigned long default_huge_page_size(void); +int sys_memfd_create(const char *name, unsigned int flags); + +#endif diff --git a/tools/testing/selftests/memfd/config b/tools/testing/selftests/memfd/config new file mode 100644 index 000000000000..835c7f4dadcd --- /dev/null +++ b/tools/testing/selftests/memfd/config @@ -0,0 +1 @@ +CONFIG_FUSE_FS=m diff --git a/tools/testing/selftests/memfd/fuse_test.c b/tools/testing/selftests/memfd/fuse_test.c index 1ccb7a3eb14b..b018e835737d 100644 --- a/tools/testing/selftests/memfd/fuse_test.c +++ b/tools/testing/selftests/memfd/fuse_test.c @@ -33,14 +33,12 @@ #include <sys/wait.h> #include <unistd.h> +#include "common.h" + #define MFD_DEF_SIZE 8192 #define STACK_SIZE 65536 -static int sys_memfd_create(const char *name, - unsigned int flags) -{ - return syscall(__NR_memfd_create, name, flags); -} +static size_t mfd_def_size = MFD_DEF_SIZE; static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags) { @@ -127,7 +125,7 @@ static void *mfd_assert_mmap_shared(int fd) void *p; p = mmap(NULL, - MFD_DEF_SIZE, + mfd_def_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, @@ -145,7 +143,7 @@ static void *mfd_assert_mmap_private(int fd) void *p; p = mmap(NULL, - MFD_DEF_SIZE, + mfd_def_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, @@ -178,7 +176,7 @@ static int sealing_thread_fn(void *arg) usleep(200000); /* unmount mapping before sealing to avoid i_mmap_writable failures */ - munmap(global_p, MFD_DEF_SIZE); + munmap(global_p, mfd_def_size); /* Try sealing the global file; expect EBUSY or success. Current * kernels will never succeed, but in the future, kernels might @@ -228,7 +226,7 @@ static void join_sealing_thread(pid_t pid) int main(int argc, char **argv) { - static const char zero[MFD_DEF_SIZE]; + char *zero; int fd, mfd, r; void *p; int was_sealed; @@ -239,6 +237,25 @@ int main(int argc, char **argv) abort(); } + if (argc >= 3) { + if (!strcmp(argv[2], "hugetlbfs")) { + unsigned long hpage_size = default_huge_page_size(); + + if (!hpage_size) { + printf("Unable to determine huge page size\n"); + abort(); + } + + hugetlbfs_test = 1; + mfd_def_size = hpage_size * 2; + } else { + printf("Unknown option: %s\n", argv[2]); + abort(); + } + } + + zero = calloc(sizeof(*zero), mfd_def_size); + /* open FUSE memfd file for GUP testing */ printf("opening: %s\n", argv[1]); fd = open(argv[1], O_RDONLY | O_CLOEXEC); @@ -249,7 +266,7 @@ int main(int argc, char **argv) /* create new memfd-object */ mfd = mfd_assert_new("kern_memfd_fuse", - MFD_DEF_SIZE, + mfd_def_size, MFD_CLOEXEC | MFD_ALLOW_SEALING); /* mmap memfd-object for writing */ @@ -268,7 +285,7 @@ int main(int argc, char **argv) * This guarantees that the receive-buffer is pinned for 1s until the * data is written into it. The racing ADD_SEALS should thus fail as * the pages are still pinned. */ - r = read(fd, p, MFD_DEF_SIZE); + r = read(fd, p, mfd_def_size); if (r < 0) { printf("read() failed: %m\n"); abort(); @@ -295,10 +312,10 @@ int main(int argc, char **argv) * enough to avoid any in-flight writes. */ p = mfd_assert_mmap_private(mfd); - if (was_sealed && memcmp(p, zero, MFD_DEF_SIZE)) { + if (was_sealed && memcmp(p, zero, mfd_def_size)) { printf("memfd sealed during read() but data not discarded\n"); abort(); - } else if (!was_sealed && !memcmp(p, zero, MFD_DEF_SIZE)) { + } else if (!was_sealed && !memcmp(p, zero, mfd_def_size)) { printf("memfd sealed after read() but data discarded\n"); abort(); } @@ -307,6 +324,7 @@ int main(int argc, char **argv) close(fd); printf("fuse: DONE\n"); + free(zero); return 0; } diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index 132a54f74e88..10baa1652fc2 100644 --- a/tools/testing/selftests/memfd/memfd_test.c +++ b/tools/testing/selftests/memfd/memfd_test.c @@ -19,7 +19,10 @@ #include <sys/wait.h> #include <unistd.h> +#include "common.h" + #define MEMFD_STR "memfd:" +#define MEMFD_HUGE_STR "memfd-hugetlb:" #define SHARED_FT_STR "(shared file-table)" #define MFD_DEF_SIZE 8192 @@ -28,41 +31,8 @@ /* * Default is not to test hugetlbfs */ -static int hugetlbfs_test; static size_t mfd_def_size = MFD_DEF_SIZE; - -/* - * Copied from mlock2-tests.c - */ -static unsigned long default_huge_page_size(void) -{ - unsigned long hps = 0; - char *line = NULL; - size_t linelen = 0; - FILE *f = fopen("/proc/meminfo", "r"); - - if (!f) - return 0; - while (getline(&line, &linelen, f) > 0) { - if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) { - hps <<= 10; - break; - } - } - - free(line); - fclose(f); - return hps; -} - -static int sys_memfd_create(const char *name, - unsigned int flags) -{ - if (hugetlbfs_test) - flags |= MFD_HUGETLB; - - return syscall(__NR_memfd_create, name, flags); -} +static const char *memfd_str = MEMFD_STR; static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags) { @@ -513,6 +483,10 @@ static void mfd_assert_grow_write(int fd) static char *buf; ssize_t l; + /* hugetlbfs does not support write */ + if (hugetlbfs_test) + return; + buf = malloc(mfd_def_size * 8); if (!buf) { printf("malloc(%zu) failed: %m\n", mfd_def_size * 8); @@ -533,6 +507,10 @@ static void mfd_fail_grow_write(int fd) static char *buf; ssize_t l; + /* hugetlbfs does not support write */ + if (hugetlbfs_test) + return; + buf = malloc(mfd_def_size * 8); if (!buf) { printf("malloc(%zu) failed: %m\n", mfd_def_size * 8); @@ -598,7 +576,7 @@ static void test_create(void) char buf[2048]; int fd; - printf("%s CREATE\n", MEMFD_STR); + printf("%s CREATE\n", memfd_str); /* test NULL name */ mfd_fail_new(NULL, 0); @@ -627,18 +605,13 @@ static void test_create(void) fd = mfd_assert_new("", 0, MFD_CLOEXEC); close(fd); - if (!hugetlbfs_test) { - /* verify MFD_ALLOW_SEALING is allowed */ - fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING); - close(fd); - - /* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */ - fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC); - close(fd); - } else { - /* sealing is not supported on hugetlbfs */ - mfd_fail_new("", MFD_ALLOW_SEALING); - } + /* verify MFD_ALLOW_SEALING is allowed */ + fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING); + close(fd); + + /* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */ + fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC); + close(fd); } /* @@ -649,11 +622,7 @@ static void test_basic(void) { int fd; - /* hugetlbfs does not contain sealing support */ - if (hugetlbfs_test) - return; - - printf("%s BASIC\n", MEMFD_STR); + printf("%s BASIC\n", memfd_str); fd = mfd_assert_new("kern_memfd_basic", mfd_def_size, @@ -698,28 +667,6 @@ static void test_basic(void) } /* - * hugetlbfs doesn't support seals or write, so just verify grow and shrink - * on a hugetlbfs file created via memfd_create. - */ -static void test_hugetlbfs_grow_shrink(void) -{ - int fd; - - printf("%s HUGETLBFS-GROW-SHRINK\n", MEMFD_STR); - - fd = mfd_assert_new("kern_memfd_seal_write", - mfd_def_size, - MFD_CLOEXEC); - - mfd_assert_read(fd); - mfd_assert_write(fd); - mfd_assert_shrink(fd); - mfd_assert_grow(fd); - - close(fd); -} - -/* * Test SEAL_WRITE * Test whether SEAL_WRITE actually prevents modifications. */ @@ -727,14 +674,7 @@ static void test_seal_write(void) { int fd; - /* - * hugetlbfs does not contain sealing or write support. Just test - * basic grow and shrink via test_hugetlbfs_grow_shrink. - */ - if (hugetlbfs_test) - return test_hugetlbfs_grow_shrink(); - - printf("%s SEAL-WRITE\n", MEMFD_STR); + printf("%s SEAL-WRITE\n", memfd_str); fd = mfd_assert_new("kern_memfd_seal_write", mfd_def_size, @@ -760,11 +700,7 @@ static void test_seal_shrink(void) { int fd; - /* hugetlbfs does not contain sealing support */ - if (hugetlbfs_test) - return; - - printf("%s SEAL-SHRINK\n", MEMFD_STR); + printf("%s SEAL-SHRINK\n", memfd_str); fd = mfd_assert_new("kern_memfd_seal_shrink", mfd_def_size, @@ -790,11 +726,7 @@ static void test_seal_grow(void) { int fd; - /* hugetlbfs does not contain sealing support */ - if (hugetlbfs_test) - return; - - printf("%s SEAL-GROW\n", MEMFD_STR); + printf("%s SEAL-GROW\n", memfd_str); fd = mfd_assert_new("kern_memfd_seal_grow", mfd_def_size, @@ -820,11 +752,7 @@ static void test_seal_resize(void) { int fd; - /* hugetlbfs does not contain sealing support */ - if (hugetlbfs_test) - return; - - printf("%s SEAL-RESIZE\n", MEMFD_STR); + printf("%s SEAL-RESIZE\n", memfd_str); fd = mfd_assert_new("kern_memfd_seal_resize", mfd_def_size, @@ -843,32 +771,6 @@ static void test_seal_resize(void) } /* - * hugetlbfs does not support seals. Basic test to dup the memfd created - * fd and perform some basic operations on it. - */ -static void hugetlbfs_dup(char *b_suffix) -{ - int fd, fd2; - - printf("%s HUGETLBFS-DUP %s\n", MEMFD_STR, b_suffix); - - fd = mfd_assert_new("kern_memfd_share_dup", - mfd_def_size, - MFD_CLOEXEC); - - fd2 = mfd_assert_dup(fd); - - mfd_assert_read(fd); - mfd_assert_write(fd); - - mfd_assert_shrink(fd2); - mfd_assert_grow(fd2); - - close(fd2); - close(fd); -} - -/* * Test sharing via dup() * Test that seals are shared between dupped FDs and they're all equal. */ @@ -876,16 +778,7 @@ static void test_share_dup(char *banner, char *b_suffix) { int fd, fd2; - /* - * hugetlbfs does not contain sealing support. Perform some - * basic testing on dup'ed fd instead via hugetlbfs_dup. - */ - if (hugetlbfs_test) { - hugetlbfs_dup(b_suffix); - return; - } - - printf("%s %s %s\n", MEMFD_STR, banner, b_suffix); + printf("%s %s %s\n", memfd_str, banner, b_suffix); fd = mfd_assert_new("kern_memfd_share_dup", mfd_def_size, @@ -927,11 +820,7 @@ static void test_share_mmap(char *banner, char *b_suffix) int fd; void *p; - /* hugetlbfs does not contain sealing support */ - if (hugetlbfs_test) - return; - - printf("%s %s %s\n", MEMFD_STR, banner, b_suffix); + printf("%s %s %s\n", memfd_str, banner, b_suffix); fd = mfd_assert_new("kern_memfd_share_mmap", mfd_def_size, @@ -956,32 +845,6 @@ static void test_share_mmap(char *banner, char *b_suffix) } /* - * Basic test to make sure we can open the hugetlbfs fd via /proc and - * perform some simple operations on it. - */ -static void hugetlbfs_proc_open(char *b_suffix) -{ - int fd, fd2; - - printf("%s HUGETLBFS-PROC-OPEN %s\n", MEMFD_STR, b_suffix); - - fd = mfd_assert_new("kern_memfd_share_open", - mfd_def_size, - MFD_CLOEXEC); - - fd2 = mfd_assert_open(fd, O_RDWR, 0); - - mfd_assert_read(fd); - mfd_assert_write(fd); - - mfd_assert_shrink(fd2); - mfd_assert_grow(fd2); - - close(fd2); - close(fd); -} - -/* * Test sealing with open(/proc/self/fd/%d) * Via /proc we can get access to a separate file-context for the same memfd. * This is *not* like dup(), but like a real separate open(). Make sure the @@ -991,16 +854,7 @@ static void test_share_open(char *banner, char *b_suffix) { int fd, fd2; - /* - * hugetlbfs does not contain sealing support. So test basic - * functionality of using /proc fd via hugetlbfs_proc_open - */ - if (hugetlbfs_test) { - hugetlbfs_proc_open(b_suffix); - return; - } - - printf("%s %s %s\n", MEMFD_STR, banner, b_suffix); + printf("%s %s %s\n", memfd_str, banner, b_suffix); fd = mfd_assert_new("kern_memfd_share_open", mfd_def_size, @@ -1043,11 +897,7 @@ static void test_share_fork(char *banner, char *b_suffix) int fd; pid_t pid; - /* hugetlbfs does not contain sealing support */ - if (hugetlbfs_test) - return; - - printf("%s %s %s\n", MEMFD_STR, banner, b_suffix); + printf("%s %s %s\n", memfd_str, banner, b_suffix); fd = mfd_assert_new("kern_memfd_share_fork", mfd_def_size, @@ -1083,7 +933,11 @@ int main(int argc, char **argv) } hugetlbfs_test = 1; + memfd_str = MEMFD_HUGE_STR; mfd_def_size = hpage_size * 2; + } else { + printf("Unknown option: %s\n", argv[1]); + abort(); } } diff --git a/tools/testing/selftests/memfd/run_fuse_test.sh b/tools/testing/selftests/memfd/run_fuse_test.sh index 407df68dfe27..22e572e2d66a 100755 --- a/tools/testing/selftests/memfd/run_fuse_test.sh +++ b/tools/testing/selftests/memfd/run_fuse_test.sh @@ -10,6 +10,6 @@ set -e mkdir mnt ./fuse_mnt ./mnt -./fuse_test ./mnt/memfd +./fuse_test ./mnt/memfd $@ fusermount -u ./mnt rmdir ./mnt diff --git a/tools/testing/selftests/memfd/run_tests.sh b/tools/testing/selftests/memfd/run_tests.sh index daabb350697c..c2d41ed81b24 100755 --- a/tools/testing/selftests/memfd/run_tests.sh +++ b/tools/testing/selftests/memfd/run_tests.sh @@ -60,6 +60,7 @@ fi # Run the hugetlbfs test # ./memfd_test hugetlbfs +./run_fuse_test.sh hugetlbfs # # Give back any huge pages allocated for the test diff --git a/tools/testing/selftests/memory-hotplug/Makefile b/tools/testing/selftests/memory-hotplug/Makefile index 86636d207adf..686da510f989 100644 --- a/tools/testing/selftests/memory-hotplug/Makefile +++ b/tools/testing/selftests/memory-hotplug/Makefile @@ -4,8 +4,9 @@ all: include ../lib.mk TEST_PROGS := mem-on-off-test.sh -override RUN_TESTS := ./mem-on-off-test.sh -r 2 && echo "selftests: memory-hotplug [PASS]" || echo "selftests: memory-hotplug [FAIL]" -override EMIT_TESTS := echo "$(RUN_TESTS)" +override RUN_TESTS := @./mem-on-off-test.sh -r 2 && echo "selftests: memory-hotplug [PASS]" || echo "selftests: memory-hotplug [FAIL]" + +override EMIT_TESTS := echo "$(subst @,,$(RUN_TESTS))" run_full_test: @/bin/bash ./mem-on-off-test.sh && echo "memory-hotplug selftests: [PASS]" || echo "memory-hotplug selftests: [FAIL]" diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 500c74db746c..785fc18a16b4 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -5,6 +5,7 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh +TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 7177bea1fdfa..6a75a3ea44ad 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -2,3 +2,8 @@ CONFIG_USER_NS=y CONFIG_BPF_SYSCALL=y CONFIG_TEST_BPF=m CONFIG_NUMA=y +CONFIG_NET_VRF=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_IPV6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_VETH=y diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh new file mode 100755 index 000000000000..3991ad1a368d --- /dev/null +++ b/tools/testing/selftests/net/fib-onlink-tests.sh @@ -0,0 +1,467 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# IPv4 and IPv6 onlink tests + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +# Network interfaces +# - odd in current namespace; even in peer ns +declare -A NETIFS +# default VRF +NETIFS[p1]=veth1 +NETIFS[p2]=veth2 +NETIFS[p3]=veth3 +NETIFS[p4]=veth4 +# VRF +NETIFS[p5]=veth5 +NETIFS[p6]=veth6 +NETIFS[p7]=veth7 +NETIFS[p8]=veth8 + +# /24 network +declare -A V4ADDRS +V4ADDRS[p1]=169.254.1.1 +V4ADDRS[p2]=169.254.1.2 +V4ADDRS[p3]=169.254.3.1 +V4ADDRS[p4]=169.254.3.2 +V4ADDRS[p5]=169.254.5.1 +V4ADDRS[p6]=169.254.5.2 +V4ADDRS[p7]=169.254.7.1 +V4ADDRS[p8]=169.254.7.2 + +# /64 network +declare -A V6ADDRS +V6ADDRS[p1]=2001:db8:101::1 +V6ADDRS[p2]=2001:db8:101::2 +V6ADDRS[p3]=2001:db8:301::1 +V6ADDRS[p4]=2001:db8:301::2 +V6ADDRS[p5]=2001:db8:501::1 +V6ADDRS[p6]=2001:db8:501::2 +V6ADDRS[p7]=2001:db8:701::1 +V6ADDRS[p8]=2001:db8:701::2 + +# Test networks: +# [1] = default table +# [2] = VRF +# +# /32 host routes +declare -A TEST_NET4 +TEST_NET4[1]=169.254.101 +TEST_NET4[2]=169.254.102 +# /128 host routes +declare -A TEST_NET6 +TEST_NET6[1]=2001:db8:101 +TEST_NET6[2]=2001:db8:102 + +# connected gateway +CONGW[1]=169.254.1.254 +CONGW[2]=169.254.3.254 +CONGW[3]=169.254.5.254 + +# recursive gateway +RECGW4[1]=169.254.11.254 +RECGW4[2]=169.254.12.254 +RECGW6[1]=2001:db8:11::64 +RECGW6[2]=2001:db8:12::64 + +# for v4 mapped to v6 +declare -A TEST_NET4IN6IN6 +TEST_NET4IN6[1]=10.1.1.254 +TEST_NET4IN6[2]=10.2.1.254 + +# mcast address +MCAST6=ff02::1 + + +PEER_NS=bart +PEER_CMD="ip netns exec ${PEER_NS}" +VRF=lisa +VRF_TABLE=1101 +PBR_TABLE=101 + +################################################################################ +# utilities + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-50s [ OK ]\n" "${msg}" + else + nfail=$((nfail+1)) + printf "\n TEST: %-50s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +log_section() +{ + echo + echo "######################################################################" + echo "TEST SECTION: $*" + echo "######################################################################" +} + +log_subsection() +{ + echo + echo "#########################################" + echo "TEST SUBSECTION: $*" +} + +run_cmd() +{ + echo + echo "COMMAND: $*" + eval $* +} + +get_linklocal() +{ + local dev=$1 + local pfx + local addr + + addr=$(${pfx} ip -6 -br addr show dev ${dev} | \ + awk '{ + for (i = 3; i <= NF; ++i) { + if ($i ~ /^fe80/) + print $i + } + }' + ) + addr=${addr/\/*} + + [ -z "$addr" ] && return 1 + + echo $addr + + return 0 +} + +################################################################################ +# + +setup() +{ + echo + echo "########################################" + echo "Configuring interfaces" + + set -e + + # create namespace + ip netns add ${PEER_NS} + ip -netns ${PEER_NS} li set lo up + + # add vrf table + ip li add ${VRF} type vrf table ${VRF_TABLE} + ip li set ${VRF} up + ip ro add table ${VRF_TABLE} unreachable default + ip -6 ro add table ${VRF_TABLE} unreachable default + + # create test interfaces + ip li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]} + ip li add ${NETIFS[p3]} type veth peer name ${NETIFS[p4]} + ip li add ${NETIFS[p5]} type veth peer name ${NETIFS[p6]} + ip li add ${NETIFS[p7]} type veth peer name ${NETIFS[p8]} + + # enslave vrf interfaces + for n in 5 7; do + ip li set ${NETIFS[p${n}]} vrf ${VRF} + done + + # add addresses + for n in 1 3 5 7; do + ip li set ${NETIFS[p${n}]} up + ip addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]} + ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} + done + + # move peer interfaces to namespace and add addresses + for n in 2 4 6 8; do + ip li set ${NETIFS[p${n}]} netns ${PEER_NS} up + ip -netns ${PEER_NS} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]} + ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} + done + + set +e + + # let DAD complete - assume default of 1 probe + sleep 1 +} + +cleanup() +{ + # make sure we start from a clean slate + ip netns del ${PEER_NS} 2>/dev/null + for n in 1 3 5 7; do + ip link del ${NETIFS[p${n}]} 2>/dev/null + done + ip link del ${VRF} 2>/dev/null + ip ro flush table ${VRF_TABLE} + ip -6 ro flush table ${VRF_TABLE} +} + +################################################################################ +# IPv4 tests +# + +run_ip() +{ + local table="$1" + local prefix="$2" + local gw="$3" + local dev="$4" + local exp_rc="$5" + local desc="$6" + + # dev arg may be empty + [ -n "${dev}" ] && dev="dev ${dev}" + + run_cmd ip ro add table "${table}" "${prefix}"/32 via "${gw}" "${dev}" onlink + log_test $? ${exp_rc} "${desc}" +} + +run_ip_mpath() +{ + local table="$1" + local prefix="$2" + local nh1="$3" + local nh2="$4" + local exp_rc="$5" + local desc="$6" + + # dev arg may be empty + [ -n "${dev}" ] && dev="dev ${dev}" + + run_cmd ip ro add table "${table}" "${prefix}"/32 \ + nexthop via ${nh1} nexthop via ${nh2} + log_test $? ${exp_rc} "${desc}" +} + +valid_onlink_ipv4() +{ + # - unicast connected, unicast recursive + # + log_subsection "default VRF - main table" + + run_ip 254 ${TEST_NET4[1]}.1 ${CONGW[1]} ${NETIFS[p1]} 0 "unicast connected" + run_ip 254 ${TEST_NET4[1]}.2 ${RECGW4[1]} ${NETIFS[p1]} 0 "unicast recursive" + + log_subsection "VRF ${VRF}" + + run_ip ${VRF_TABLE} ${TEST_NET4[2]}.1 ${CONGW[3]} ${NETIFS[p5]} 0 "unicast connected" + run_ip ${VRF_TABLE} ${TEST_NET4[2]}.2 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive" + + log_subsection "VRF device, PBR table" + + run_ip ${PBR_TABLE} ${TEST_NET4[2]}.3 ${CONGW[3]} ${NETIFS[p5]} 0 "unicast connected" + run_ip ${PBR_TABLE} ${TEST_NET4[2]}.4 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive" + + # multipath version + # + log_subsection "default VRF - main table - multipath" + + run_ip_mpath 254 ${TEST_NET4[1]}.5 \ + "${CONGW[1]} dev ${NETIFS[p1]} onlink" \ + "${CONGW[2]} dev ${NETIFS[p3]} onlink" \ + 0 "unicast connected - multipath" + + run_ip_mpath 254 ${TEST_NET4[1]}.6 \ + "${RECGW4[1]} dev ${NETIFS[p1]} onlink" \ + "${RECGW4[2]} dev ${NETIFS[p3]} onlink" \ + 0 "unicast recursive - multipath" + + run_ip_mpath 254 ${TEST_NET4[1]}.7 \ + "${CONGW[1]} dev ${NETIFS[p1]}" \ + "${CONGW[2]} dev ${NETIFS[p3]} onlink" \ + 0 "unicast connected - multipath onlink first only" + + run_ip_mpath 254 ${TEST_NET4[1]}.8 \ + "${CONGW[1]} dev ${NETIFS[p1]} onlink" \ + "${CONGW[2]} dev ${NETIFS[p3]}" \ + 0 "unicast connected - multipath onlink second only" +} + +invalid_onlink_ipv4() +{ + run_ip 254 ${TEST_NET4[1]}.11 ${V4ADDRS[p1]} ${NETIFS[p1]} 2 \ + "Invalid gw - local unicast address" + + run_ip ${VRF_TABLE} ${TEST_NET4[2]}.11 ${V4ADDRS[p5]} ${NETIFS[p5]} 2 \ + "Invalid gw - local unicast address, VRF" + + run_ip 254 ${TEST_NET4[1]}.101 ${V4ADDRS[p1]} "" 2 "No nexthop device given" + + run_ip 254 ${TEST_NET4[1]}.102 ${V4ADDRS[p3]} ${NETIFS[p1]} 2 \ + "Gateway resolves to wrong nexthop device" + + run_ip ${VRF_TABLE} ${TEST_NET4[2]}.103 ${V4ADDRS[p7]} ${NETIFS[p5]} 2 \ + "Gateway resolves to wrong nexthop device - VRF" +} + +################################################################################ +# IPv6 tests +# + +run_ip6() +{ + local table="$1" + local prefix="$2" + local gw="$3" + local dev="$4" + local exp_rc="$5" + local desc="$6" + + # dev arg may be empty + [ -n "${dev}" ] && dev="dev ${dev}" + + run_cmd ip -6 ro add table "${table}" "${prefix}"/128 via "${gw}" "${dev}" onlink + log_test $? ${exp_rc} "${desc}" +} + +run_ip6_mpath() +{ + local table="$1" + local prefix="$2" + local opts="$3" + local nh1="$4" + local nh2="$5" + local exp_rc="$6" + local desc="$7" + + run_cmd ip -6 ro add table "${table}" "${prefix}"/128 "${opts}" \ + nexthop via ${nh1} nexthop via ${nh2} + log_test $? ${exp_rc} "${desc}" +} + +valid_onlink_ipv6() +{ + # - unicast connected, unicast recursive, v4-mapped + # + log_subsection "default VRF - main table" + + run_ip6 254 ${TEST_NET6[1]}::1 ${V6ADDRS[p1]/::*}::64 ${NETIFS[p1]} 0 "unicast connected" + run_ip6 254 ${TEST_NET6[1]}::2 ${RECGW6[1]} ${NETIFS[p1]} 0 "unicast recursive" + run_ip6 254 ${TEST_NET6[1]}::3 ::ffff:${TEST_NET4IN6[1]} ${NETIFS[p1]} 0 "v4-mapped" + + log_subsection "VRF ${VRF}" + + run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::1 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected" + run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::2 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive" + run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::3 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped" + + log_subsection "VRF device, PBR table" + + run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::4 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected" + run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::5 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive" + run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::6 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped" + + # multipath version + # + log_subsection "default VRF - main table - multipath" + + run_ip6_mpath 254 ${TEST_NET6[1]}::4 "onlink" \ + "${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]}" \ + "${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]}" \ + 0 "unicast connected - multipath onlink" + + run_ip6_mpath 254 ${TEST_NET6[1]}::5 "onlink" \ + "${RECGW6[1]} dev ${NETIFS[p1]}" \ + "${RECGW6[2]} dev ${NETIFS[p3]}" \ + 0 "unicast recursive - multipath onlink" + + run_ip6_mpath 254 ${TEST_NET6[1]}::6 "onlink" \ + "::ffff:${TEST_NET4IN6[1]} dev ${NETIFS[p1]}" \ + "::ffff:${TEST_NET4IN6[2]} dev ${NETIFS[p3]}" \ + 0 "v4-mapped - multipath onlink" + + run_ip6_mpath 254 ${TEST_NET6[1]}::7 "" \ + "${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]} onlink" \ + "${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]} onlink" \ + 0 "unicast connected - multipath onlink both nexthops" + + run_ip6_mpath 254 ${TEST_NET6[1]}::8 "" \ + "${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]} onlink" \ + "${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]}" \ + 0 "unicast connected - multipath onlink first only" + + run_ip6_mpath 254 ${TEST_NET6[1]}::9 "" \ + "${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]}" \ + "${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]} onlink" \ + 0 "unicast connected - multipath onlink second only" +} + +invalid_onlink_ipv6() +{ + local lladdr + + lladdr=$(get_linklocal ${NETIFS[p1]}) || return 1 + + run_ip6 254 ${TEST_NET6[1]}::11 ${V6ADDRS[p1]} ${NETIFS[p1]} 2 \ + "Invalid gw - local unicast address" + run_ip6 254 ${TEST_NET6[1]}::12 ${lladdr} ${NETIFS[p1]} 2 \ + "Invalid gw - local linklocal address" + run_ip6 254 ${TEST_NET6[1]}::12 ${MCAST6} ${NETIFS[p1]} 2 \ + "Invalid gw - multicast address" + + lladdr=$(get_linklocal ${NETIFS[p5]}) || return 1 + run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::11 ${V6ADDRS[p5]} ${NETIFS[p5]} 2 \ + "Invalid gw - local unicast address, VRF" + run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::12 ${lladdr} ${NETIFS[p5]} 2 \ + "Invalid gw - local linklocal address, VRF" + run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::12 ${MCAST6} ${NETIFS[p5]} 2 \ + "Invalid gw - multicast address, VRF" + + run_ip6 254 ${TEST_NET6[1]}::101 ${V6ADDRS[p1]} "" 2 \ + "No nexthop device given" + + # default VRF validation is done against LOCAL table + # run_ip6 254 ${TEST_NET6[1]}::102 ${V6ADDRS[p3]/::[0-9]/::64} ${NETIFS[p1]} 2 \ + # "Gateway resolves to wrong nexthop device" + + run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::103 ${V6ADDRS[p7]/::[0-9]/::64} ${NETIFS[p5]} 2 \ + "Gateway resolves to wrong nexthop device - VRF" +} + +run_onlink_tests() +{ + log_section "IPv4 onlink" + log_subsection "Valid onlink commands" + valid_onlink_ipv4 + log_subsection "Invalid onlink commands" + invalid_onlink_ipv4 + + log_section "IPv6 onlink" + log_subsection "Valid onlink commands" + valid_onlink_ipv6 + log_subsection "Invalid onlink commands" + invalid_onlink_ipv6 +} + +################################################################################ +# main + +nsuccess=0 +nfail=0 + +cleanup +setup +run_onlink_tests +cleanup + +if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} +fi diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh new file mode 100755 index 000000000000..9164e60d4b66 --- /dev/null +++ b/tools/testing/selftests/net/fib_tests.sh @@ -0,0 +1,601 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is for checking IPv4 and IPv6 FIB behavior in response to +# different events. + +ret=0 + +VERBOSE=${VERBOSE:=0} +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} +IP="ip -netns testns" + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf " TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + printf " TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +setup() +{ + set -e + ip netns add testns + $IP link set dev lo up + + $IP link add dummy0 type dummy + $IP link set dev dummy0 up + $IP address add 198.51.100.1/24 dev dummy0 + $IP -6 address add 2001:db8:1::1/64 dev dummy0 + set +e + +} + +cleanup() +{ + $IP link del dev dummy0 &> /dev/null + ip netns del testns +} + +get_linklocal() +{ + local dev=$1 + local addr + + addr=$($IP -6 -br addr show dev ${dev} | \ + awk '{ + for (i = 3; i <= NF; ++i) { + if ($i ~ /^fe80/) + print $i + } + }' + ) + addr=${addr/\/*} + + [ -z "$addr" ] && return 1 + + echo $addr + + return 0 +} + +fib_unreg_unicast_test() +{ + echo + echo "Single path route test" + + setup + + echo " Start point" + $IP route get fibmatch 198.51.100.2 &> /dev/null + log_test $? 0 "IPv4 fibmatch" + $IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null + log_test $? 0 "IPv6 fibmatch" + + set -e + $IP link del dev dummy0 + set +e + + echo " Nexthop device deleted" + $IP route get fibmatch 198.51.100.2 &> /dev/null + log_test $? 2 "IPv4 fibmatch - no route" + $IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null + log_test $? 2 "IPv6 fibmatch - no route" + + cleanup +} + +fib_unreg_multipath_test() +{ + + echo + echo "Multipath route test" + + setup + + set -e + $IP link add dummy1 type dummy + $IP link set dev dummy1 up + $IP address add 192.0.2.1/24 dev dummy1 + $IP -6 address add 2001:db8:2::1/64 dev dummy1 + + $IP route add 203.0.113.0/24 \ + nexthop via 198.51.100.2 dev dummy0 \ + nexthop via 192.0.2.2 dev dummy1 + $IP -6 route add 2001:db8:3::/64 \ + nexthop via 2001:db8:1::2 dev dummy0 \ + nexthop via 2001:db8:2::2 dev dummy1 + set +e + + echo " Start point" + $IP route get fibmatch 203.0.113.1 &> /dev/null + log_test $? 0 "IPv4 fibmatch" + $IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null + log_test $? 0 "IPv6 fibmatch" + + set -e + $IP link del dev dummy0 + set +e + + echo " One nexthop device deleted" + $IP route get fibmatch 203.0.113.1 &> /dev/null + log_test $? 2 "IPv4 - multipath route removed on delete" + + $IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null + # In IPv6 we do not flush the entire multipath route. + log_test $? 0 "IPv6 - multipath down to single path" + + set -e + $IP link del dev dummy1 + set +e + + echo " Second nexthop device deleted" + $IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null + log_test $? 2 "IPv6 - no route" + + cleanup +} + +fib_unreg_test() +{ + fib_unreg_unicast_test + fib_unreg_multipath_test +} + +fib_down_unicast_test() +{ + echo + echo "Single path, admin down" + + setup + + echo " Start point" + $IP route get fibmatch 198.51.100.2 &> /dev/null + log_test $? 0 "IPv4 fibmatch" + $IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null + log_test $? 0 "IPv6 fibmatch" + + set -e + $IP link set dev dummy0 down + set +e + + echo " Route deleted on down" + $IP route get fibmatch 198.51.100.2 &> /dev/null + log_test $? 2 "IPv4 fibmatch" + $IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null + log_test $? 2 "IPv6 fibmatch" + + cleanup +} + +fib_down_multipath_test_do() +{ + local down_dev=$1 + local up_dev=$2 + + $IP route get fibmatch 203.0.113.1 \ + oif $down_dev &> /dev/null + log_test $? 2 "IPv4 fibmatch on down device" + $IP -6 route get fibmatch 2001:db8:3::1 \ + oif $down_dev &> /dev/null + log_test $? 2 "IPv6 fibmatch on down device" + + $IP route get fibmatch 203.0.113.1 \ + oif $up_dev &> /dev/null + log_test $? 0 "IPv4 fibmatch on up device" + $IP -6 route get fibmatch 2001:db8:3::1 \ + oif $up_dev &> /dev/null + log_test $? 0 "IPv6 fibmatch on up device" + + $IP route get fibmatch 203.0.113.1 | \ + grep $down_dev | grep -q "dead linkdown" + log_test $? 0 "IPv4 flags on down device" + $IP -6 route get fibmatch 2001:db8:3::1 | \ + grep $down_dev | grep -q "dead linkdown" + log_test $? 0 "IPv6 flags on down device" + + $IP route get fibmatch 203.0.113.1 | \ + grep $up_dev | grep -q "dead linkdown" + log_test $? 1 "IPv4 flags on up device" + $IP -6 route get fibmatch 2001:db8:3::1 | \ + grep $up_dev | grep -q "dead linkdown" + log_test $? 1 "IPv6 flags on up device" +} + +fib_down_multipath_test() +{ + echo + echo "Admin down multipath" + + setup + + set -e + $IP link add dummy1 type dummy + $IP link set dev dummy1 up + + $IP address add 192.0.2.1/24 dev dummy1 + $IP -6 address add 2001:db8:2::1/64 dev dummy1 + + $IP route add 203.0.113.0/24 \ + nexthop via 198.51.100.2 dev dummy0 \ + nexthop via 192.0.2.2 dev dummy1 + $IP -6 route add 2001:db8:3::/64 \ + nexthop via 2001:db8:1::2 dev dummy0 \ + nexthop via 2001:db8:2::2 dev dummy1 + set +e + + echo " Verify start point" + $IP route get fibmatch 203.0.113.1 &> /dev/null + log_test $? 0 "IPv4 fibmatch" + + $IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null + log_test $? 0 "IPv6 fibmatch" + + set -e + $IP link set dev dummy0 down + set +e + + echo " One device down, one up" + fib_down_multipath_test_do "dummy0" "dummy1" + + set -e + $IP link set dev dummy0 up + $IP link set dev dummy1 down + set +e + + echo " Other device down and up" + fib_down_multipath_test_do "dummy1" "dummy0" + + set -e + $IP link set dev dummy0 down + set +e + + echo " Both devices down" + $IP route get fibmatch 203.0.113.1 &> /dev/null + log_test $? 2 "IPv4 fibmatch" + $IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null + log_test $? 2 "IPv6 fibmatch" + + $IP link del dev dummy1 + cleanup +} + +fib_down_test() +{ + fib_down_unicast_test + fib_down_multipath_test +} + +# Local routes should not be affected when carrier changes. +fib_carrier_local_test() +{ + echo + echo "Local carrier tests - single path" + + setup + + set -e + $IP link set dev dummy0 carrier on + set +e + + echo " Start point" + $IP route get fibmatch 198.51.100.1 &> /dev/null + log_test $? 0 "IPv4 fibmatch" + $IP -6 route get fibmatch 2001:db8:1::1 &> /dev/null + log_test $? 0 "IPv6 fibmatch" + + $IP route get fibmatch 198.51.100.1 | \ + grep -q "linkdown" + log_test $? 1 "IPv4 - no linkdown flag" + $IP -6 route get fibmatch 2001:db8:1::1 | \ + grep -q "linkdown" + log_test $? 1 "IPv6 - no linkdown flag" + + set -e + $IP link set dev dummy0 carrier off + sleep 1 + set +e + + echo " Carrier off on nexthop" + $IP route get fibmatch 198.51.100.1 &> /dev/null + log_test $? 0 "IPv4 fibmatch" + $IP -6 route get fibmatch 2001:db8:1::1 &> /dev/null + log_test $? 0 "IPv6 fibmatch" + + $IP route get fibmatch 198.51.100.1 | \ + grep -q "linkdown" + log_test $? 1 "IPv4 - linkdown flag set" + $IP -6 route get fibmatch 2001:db8:1::1 | \ + grep -q "linkdown" + log_test $? 1 "IPv6 - linkdown flag set" + + set -e + $IP address add 192.0.2.1/24 dev dummy0 + $IP -6 address add 2001:db8:2::1/64 dev dummy0 + set +e + + echo " Route to local address with carrier down" + $IP route get fibmatch 192.0.2.1 &> /dev/null + log_test $? 0 "IPv4 fibmatch" + $IP -6 route get fibmatch 2001:db8:2::1 &> /dev/null + log_test $? 0 "IPv6 fibmatch" + + $IP route get fibmatch 192.0.2.1 | \ + grep -q "linkdown" + log_test $? 1 "IPv4 linkdown flag set" + $IP -6 route get fibmatch 2001:db8:2::1 | \ + grep -q "linkdown" + log_test $? 1 "IPv6 linkdown flag set" + + cleanup +} + +fib_carrier_unicast_test() +{ + ret=0 + + echo + echo "Single path route carrier test" + + setup + + set -e + $IP link set dev dummy0 carrier on + set +e + + echo " Start point" + $IP route get fibmatch 198.51.100.2 &> /dev/null + log_test $? 0 "IPv4 fibmatch" + $IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null + log_test $? 0 "IPv6 fibmatch" + + $IP route get fibmatch 198.51.100.2 | \ + grep -q "linkdown" + log_test $? 1 "IPv4 no linkdown flag" + $IP -6 route get fibmatch 2001:db8:1::2 | \ + grep -q "linkdown" + log_test $? 1 "IPv6 no linkdown flag" + + set -e + $IP link set dev dummy0 carrier off + set +e + + echo " Carrier down" + $IP route get fibmatch 198.51.100.2 &> /dev/null + log_test $? 0 "IPv4 fibmatch" + $IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null + log_test $? 0 "IPv6 fibmatch" + + $IP route get fibmatch 198.51.100.2 | \ + grep -q "linkdown" + log_test $? 0 "IPv4 linkdown flag set" + $IP -6 route get fibmatch 2001:db8:1::2 | \ + grep -q "linkdown" + log_test $? 0 "IPv6 linkdown flag set" + + set -e + $IP address add 192.0.2.1/24 dev dummy0 + $IP -6 address add 2001:db8:2::1/64 dev dummy0 + set +e + + echo " Second address added with carrier down" + $IP route get fibmatch 192.0.2.2 &> /dev/null + log_test $? 0 "IPv4 fibmatch" + $IP -6 route get fibmatch 2001:db8:2::2 &> /dev/null + log_test $? 0 "IPv6 fibmatch" + + $IP route get fibmatch 192.0.2.2 | \ + grep -q "linkdown" + log_test $? 0 "IPv4 linkdown flag set" + $IP -6 route get fibmatch 2001:db8:2::2 | \ + grep -q "linkdown" + log_test $? 0 "IPv6 linkdown flag set" + + cleanup +} + +fib_carrier_test() +{ + fib_carrier_local_test + fib_carrier_unicast_test +} + +################################################################################ +# Tests on nexthop spec + +# run 'ip route add' with given spec +add_rt() +{ + local desc="$1" + local erc=$2 + local vrf=$3 + local pfx=$4 + local gw=$5 + local dev=$6 + local cmd out rc + + [ "$vrf" = "-" ] && vrf="default" + [ -n "$gw" ] && gw="via $gw" + [ -n "$dev" ] && dev="dev $dev" + + cmd="$IP route add vrf $vrf $pfx $gw $dev" + if [ "$VERBOSE" = "1" ]; then + printf "\n COMMAND: $cmd\n" + fi + + out=$(eval $cmd 2>&1) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo " $out" + fi + log_test $rc $erc "$desc" +} + +fib4_nexthop() +{ + echo + echo "IPv4 nexthop tests" + + echo "<<< write me >>>" +} + +fib6_nexthop() +{ + local lldummy=$(get_linklocal dummy0) + local llv1=$(get_linklocal dummy0) + + if [ -z "$lldummy" ]; then + echo "Failed to get linklocal address for dummy0" + return 1 + fi + if [ -z "$llv1" ]; then + echo "Failed to get linklocal address for veth1" + return 1 + fi + + echo + echo "IPv6 nexthop tests" + + add_rt "Directly connected nexthop, unicast address" 0 \ + - 2001:db8:101::/64 2001:db8:1::2 + add_rt "Directly connected nexthop, unicast address with device" 0 \ + - 2001:db8:102::/64 2001:db8:1::2 "dummy0" + add_rt "Gateway is linklocal address" 0 \ + - 2001:db8:103::1/64 $llv1 "veth0" + + # fails because LL address requires a device + add_rt "Gateway is linklocal address, no device" 2 \ + - 2001:db8:104::1/64 $llv1 + + # local address can not be a gateway + add_rt "Gateway can not be local unicast address" 2 \ + - 2001:db8:105::/64 2001:db8:1::1 + add_rt "Gateway can not be local unicast address, with device" 2 \ + - 2001:db8:106::/64 2001:db8:1::1 "dummy0" + add_rt "Gateway can not be a local linklocal address" 2 \ + - 2001:db8:107::1/64 $lldummy "dummy0" + + # VRF tests + add_rt "Gateway can be local address in a VRF" 0 \ + - 2001:db8:108::/64 2001:db8:51::2 + add_rt "Gateway can be local address in a VRF, with device" 0 \ + - 2001:db8:109::/64 2001:db8:51::2 "veth0" + add_rt "Gateway can be local linklocal address in a VRF" 0 \ + - 2001:db8:110::1/64 $llv1 "veth0" + + add_rt "Redirect to VRF lookup" 0 \ + - 2001:db8:111::/64 "" "red" + + add_rt "VRF route, gateway can be local address in default VRF" 0 \ + red 2001:db8:112::/64 2001:db8:51::1 + + # local address in same VRF fails + add_rt "VRF route, gateway can not be a local address" 2 \ + red 2001:db8:113::1/64 2001:db8:2::1 + add_rt "VRF route, gateway can not be a local addr with device" 2 \ + red 2001:db8:114::1/64 2001:db8:2::1 "dummy1" +} + +# Default VRF: +# dummy0 - 198.51.100.1/24 2001:db8:1::1/64 +# veth0 - 192.0.2.1/24 2001:db8:51::1/64 +# +# VRF red: +# dummy1 - 192.168.2.1/24 2001:db8:2::1/64 +# veth1 - 192.0.2.2/24 2001:db8:51::2/64 +# +# [ dummy0 veth0 ]--[ veth1 dummy1 ] + +fib_nexthop_test() +{ + setup + + set -e + + $IP -4 rule add pref 32765 table local + $IP -4 rule del pref 0 + $IP -6 rule add pref 32765 table local + $IP -6 rule del pref 0 + + $IP link add red type vrf table 1 + $IP link set red up + $IP -4 route add vrf red unreachable default metric 4278198272 + $IP -6 route add vrf red unreachable default metric 4278198272 + + $IP link add veth0 type veth peer name veth1 + $IP link set dev veth0 up + $IP address add 192.0.2.1/24 dev veth0 + $IP -6 address add 2001:db8:51::1/64 dev veth0 + + $IP link set dev veth1 vrf red up + $IP address add 192.0.2.2/24 dev veth1 + $IP -6 address add 2001:db8:51::2/64 dev veth1 + + $IP link add dummy1 type dummy + $IP link set dev dummy1 vrf red up + $IP address add 192.168.2.1/24 dev dummy1 + $IP -6 address add 2001:db8:2::1/64 dev dummy1 + set +e + + sleep 1 + fib4_nexthop + fib6_nexthop + + ( + $IP link del dev dummy1 + $IP link del veth0 + $IP link del red + ) 2>/dev/null + cleanup +} + +################################################################################ +# + +fib_test() +{ + if [ -n "$TEST" ]; then + eval $TEST + else + fib_unreg_test + fib_down_test + fib_carrier_test + fib_nexthop_test + fi +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit 0 +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit 0 +fi + +ip route help 2>&1 | grep -q fibmatch +if [ $? -ne 0 ]; then + echo "SKIP: iproute2 too old, missing fibmatch" + exit 0 +fi + +# start clean +cleanup &> /dev/null + +fib_test + +exit $ret diff --git a/tools/testing/selftests/net/forwarding/.gitignore b/tools/testing/selftests/net/forwarding/.gitignore new file mode 100644 index 000000000000..a793eef5b876 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/.gitignore @@ -0,0 +1 @@ +forwarding.config diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README new file mode 100644 index 000000000000..4a0964c42860 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/README @@ -0,0 +1,56 @@ +Motivation +========== + +One of the nice things about network namespaces is that they allow one +to easily create and test complex environments. + +Unfortunately, these namespaces can not be used with actual switching +ASICs, as their ports can not be migrated to other network namespaces +(NETIF_F_NETNS_LOCAL) and most of them probably do not support the +L1-separation provided by namespaces. + +However, a similar kind of flexibility can be achieved by using VRFs and +by looping the switch ports together. For example: + + br0 + + + vrf-h1 | vrf-h2 + + +---+----+ + + | | | | + 192.0.2.1/24 + + + + 192.0.2.2/24 + swp1 swp2 swp3 swp4 + + + + + + | | | | + +--------+ +--------+ + +The VRFs act as lightweight namespaces representing hosts connected to +the switch. + +This approach for testing switch ASICs has several advantages over the +traditional method that requires multiple physical machines, to name a +few: + +1. Only the device under test (DUT) is being tested without noise from +other system. + +2. Ability to easily provision complex topologies. Testing bridging +between 4-ports LAGs or 8-way ECMP requires many physical links that are +not always available. With the VRF-based approach one merely needs to +loopback more ports. + +These tests are written with switch ASICs in mind, but they can be run +on any Linux box using veth pairs to emulate physical loopbacks. + +Guidelines for Writing Tests +============================ + +o Where possible, reuse an existing topology for different tests instead + of recreating the same topology. +o Where possible, IPv6 and IPv4 addresses shall conform to RFC 3849 and + RFC 5737, respectively. +o Where possible, tests shall be written so that they can be reused by + multiple topologies and added to lib.sh. +o Checks shall be added to lib.sh for any external dependencies. +o Code shall be checked using ShellCheck [1] prior to submission. + +1. https://www.shellcheck.net/ diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh new file mode 100755 index 000000000000..75d922438bc9 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh @@ -0,0 +1,88 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NUM_NETIFS=4 +CHECK_TC="yes" +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64 +} + +switch_create() +{ + # 10 Seconds ageing time. + ip link add dev br0 type bridge vlan_filtering 1 ageing_time 1000 \ + mcast_snooping 0 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +switch_destroy() +{ + ip link set dev $swp2 down + ip link set dev $swp1 down + + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +ping_test $h1 192.0.2.2 +ping6_test $h1 2001:db8:1::2 +learning_test "br0" $swp1 $h1 $h2 +flood_test $swp2 $h1 $h2 + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh new file mode 100755 index 000000000000..1cddf06f691d --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh @@ -0,0 +1,86 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64 +} + +switch_create() +{ + # 10 Seconds ageing time. + ip link add dev br0 type bridge ageing_time 1000 mcast_snooping 0 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +switch_destroy() +{ + ip link set dev $swp2 down + ip link set dev $swp1 down + + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +ping_test $h1 192.0.2.2 +ping6_test $h1 2001:db8:1::2 +learning_test "br0" $swp1 $h1 $h2 +flood_test $swp2 $h1 $h2 + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config new file mode 100644 index 000000000000..5cd2aed97958 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/config @@ -0,0 +1,12 @@ +CONFIG_BRIDGE=m +CONFIG_VLAN_8021Q=m +CONFIG_BRIDGE_VLAN_FILTERING=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NET_VRF=m +CONFIG_BPF_SYSCALL=y +CONFIG_CGROUP_BPF=y +CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_ACT_GACT=m +CONFIG_VETH=m diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample new file mode 100644 index 000000000000..e819d049d9ce --- /dev/null +++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample @@ -0,0 +1,35 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +############################################################################## +# Topology description. p1 looped back to p2, p3 to p4 and so on. +declare -A NETIFS + +NETIFS[p1]=veth0 +NETIFS[p2]=veth1 +NETIFS[p3]=veth2 +NETIFS[p4]=veth3 +NETIFS[p5]=veth4 +NETIFS[p6]=veth5 +NETIFS[p7]=veth6 +NETIFS[p8]=veth7 + +############################################################################## +# Defines + +# IPv4 ping utility name +PING=ping +# IPv6 ping utility name. Some distributions use 'ping' for IPv6. +PING6=ping6 +# Packet generator. Some distributions use 'mz'. +MZ=mausezahn +# Time to wait after interfaces participating in the test are all UP +WAIT_TIME=5 +# Whether to pause on failure or not. +PAUSE_ON_FAIL=no +# Whether to pause on cleanup or not. +PAUSE_ON_CLEANUP=no +# Type of network interface to create +NETIF_TYPE=veth +# Whether to create virtual interfaces (veth) or not +NETIF_CREATE=yes diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh new file mode 100644 index 000000000000..1ac6c62271f3 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -0,0 +1,577 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +############################################################################## +# Defines + +# Can be overridden by the configuration file. +PING=${PING:=ping} +PING6=${PING6:=ping6} +MZ=${MZ:=mausezahn} +WAIT_TIME=${WAIT_TIME:=5} +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} +PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no} +NETIF_TYPE=${NETIF_TYPE:=veth} +NETIF_CREATE=${NETIF_CREATE:=yes} + +if [[ -f forwarding.config ]]; then + source forwarding.config +fi + +############################################################################## +# Sanity checks + +check_tc_version() +{ + tc -j &> /dev/null + if [[ $? -ne 0 ]]; then + echo "SKIP: iproute2 too old; tc is missing JSON support" + exit 1 + fi + + tc filter help 2>&1 | grep block &> /dev/null + if [[ $? -ne 0 ]]; then + echo "SKIP: iproute2 too old; tc is missing shared block support" + exit 1 + fi +} + +if [[ "$(id -u)" -ne 0 ]]; then + echo "SKIP: need root privileges" + exit 0 +fi + +if [[ "$CHECK_TC" = "yes" ]]; then + check_tc_version +fi + +if [[ ! -x "$(command -v jq)" ]]; then + echo "SKIP: jq not installed" + exit 1 +fi + +if [[ ! -x "$(command -v $MZ)" ]]; then + echo "SKIP: $MZ not installed" + exit 1 +fi + +if [[ ! -v NUM_NETIFS ]]; then + echo "SKIP: importer does not define \"NUM_NETIFS\"" + exit 1 +fi + +############################################################################## +# Command line options handling + +count=0 + +while [[ $# -gt 0 ]]; do + if [[ "$count" -eq "0" ]]; then + unset NETIFS + declare -A NETIFS + fi + count=$((count + 1)) + NETIFS[p$count]="$1" + shift +done + +############################################################################## +# Network interfaces configuration + +create_netif_veth() +{ + local i + + for i in $(eval echo {1..$NUM_NETIFS}); do + local j=$((i+1)) + + ip link show dev ${NETIFS[p$i]} &> /dev/null + if [[ $? -ne 0 ]]; then + ip link add ${NETIFS[p$i]} type veth \ + peer name ${NETIFS[p$j]} + if [[ $? -ne 0 ]]; then + echo "Failed to create netif" + exit 1 + fi + fi + i=$j + done +} + +create_netif() +{ + case "$NETIF_TYPE" in + veth) create_netif_veth + ;; + *) echo "Can not create interfaces of type \'$NETIF_TYPE\'" + exit 1 + ;; + esac +} + +if [[ "$NETIF_CREATE" = "yes" ]]; then + create_netif +fi + +for i in $(eval echo {1..$NUM_NETIFS}); do + ip link show dev ${NETIFS[p$i]} &> /dev/null + if [[ $? -ne 0 ]]; then + echo "SKIP: could not find all required interfaces" + exit 1 + fi +done + +############################################################################## +# Helpers + +# Exit status to return at the end. Set in case one of the tests fails. +EXIT_STATUS=0 +# Per-test return value. Clear at the beginning of each test. +RET=0 + +check_err() +{ + local err=$1 + local msg=$2 + + if [[ $RET -eq 0 && $err -ne 0 ]]; then + RET=$err + retmsg=$msg + fi +} + +check_fail() +{ + local err=$1 + local msg=$2 + + if [[ $RET -eq 0 && $err -eq 0 ]]; then + RET=1 + retmsg=$msg + fi +} + +log_test() +{ + local test_name=$1 + local opt_str=$2 + + if [[ $# -eq 2 ]]; then + opt_str="($opt_str)" + fi + + if [[ $RET -ne 0 ]]; then + EXIT_STATUS=1 + printf "TEST: %-60s [FAIL]\n" "$test_name $opt_str" + if [[ ! -z "$retmsg" ]]; then + printf "\t%s\n" "$retmsg" + fi + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo "Hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + return 1 + fi + + printf "TEST: %-60s [PASS]\n" "$test_name $opt_str" + return 0 +} + +log_info() +{ + local msg=$1 + + echo "INFO: $msg" +} + +setup_wait() +{ + for i in $(eval echo {1..$NUM_NETIFS}); do + while true; do + ip link show dev ${NETIFS[p$i]} up \ + | grep 'state UP' &> /dev/null + if [[ $? -ne 0 ]]; then + sleep 1 + else + break + fi + done + done + + # Make sure links are ready. + sleep $WAIT_TIME +} + +pre_cleanup() +{ + if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then + echo "Pausing before cleanup, hit any key to continue" + read + fi +} + +vrf_prepare() +{ + ip -4 rule add pref 32765 table local + ip -4 rule del pref 0 + ip -6 rule add pref 32765 table local + ip -6 rule del pref 0 +} + +vrf_cleanup() +{ + ip -6 rule add pref 0 table local + ip -6 rule del pref 32765 + ip -4 rule add pref 0 table local + ip -4 rule del pref 32765 +} + +__last_tb_id=0 +declare -A __TB_IDS + +__vrf_td_id_assign() +{ + local vrf_name=$1 + + __last_tb_id=$((__last_tb_id + 1)) + __TB_IDS[$vrf_name]=$__last_tb_id + return $__last_tb_id +} + +__vrf_td_id_lookup() +{ + local vrf_name=$1 + + return ${__TB_IDS[$vrf_name]} +} + +vrf_create() +{ + local vrf_name=$1 + local tb_id + + __vrf_td_id_assign $vrf_name + tb_id=$? + + ip link add dev $vrf_name type vrf table $tb_id + ip -4 route add table $tb_id unreachable default metric 4278198272 + ip -6 route add table $tb_id unreachable default metric 4278198272 +} + +vrf_destroy() +{ + local vrf_name=$1 + local tb_id + + __vrf_td_id_lookup $vrf_name + tb_id=$? + + ip -6 route del table $tb_id unreachable default metric 4278198272 + ip -4 route del table $tb_id unreachable default metric 4278198272 + ip link del dev $vrf_name +} + +__addr_add_del() +{ + local if_name=$1 + local add_del=$2 + local array + + shift + shift + array=("${@}") + + for addrstr in "${array[@]}"; do + ip address $add_del $addrstr dev $if_name + done +} + +simple_if_init() +{ + local if_name=$1 + local vrf_name + local array + + shift + vrf_name=v$if_name + array=("${@}") + + vrf_create $vrf_name + ip link set dev $if_name master $vrf_name + ip link set dev $vrf_name up + ip link set dev $if_name up + + __addr_add_del $if_name add "${array[@]}" +} + +simple_if_fini() +{ + local if_name=$1 + local vrf_name + local array + + shift + vrf_name=v$if_name + array=("${@}") + + __addr_add_del $if_name del "${array[@]}" + + ip link set dev $if_name down + vrf_destroy $vrf_name +} + +master_name_get() +{ + local if_name=$1 + + ip -j link show dev $if_name | jq -r '.[]["master"]' +} + +link_stats_tx_packets_get() +{ + local if_name=$1 + + ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]' +} + +mac_get() +{ + local if_name=$1 + + ip -j link show dev $if_name | jq -r '.[]["address"]' +} + +bridge_ageing_time_get() +{ + local bridge=$1 + local ageing_time + + # Need to divide by 100 to convert to seconds. + ageing_time=$(ip -j -d link show dev $bridge \ + | jq '.[]["linkinfo"]["info_data"]["ageing_time"]') + echo $((ageing_time / 100)) +} + +forwarding_enable() +{ + ipv4_fwd=$(sysctl -n net.ipv4.conf.all.forwarding) + ipv6_fwd=$(sysctl -n net.ipv6.conf.all.forwarding) + + sysctl -q -w net.ipv4.conf.all.forwarding=1 + sysctl -q -w net.ipv6.conf.all.forwarding=1 +} + +forwarding_restore() +{ + sysctl -q -w net.ipv6.conf.all.forwarding=$ipv6_fwd + sysctl -q -w net.ipv4.conf.all.forwarding=$ipv4_fwd +} + +tc_offload_check() +{ + for i in $(eval echo {1..$NUM_NETIFS}); do + ethtool -k ${NETIFS[p$i]} \ + | grep "hw-tc-offload: on" &> /dev/null + if [[ $? -ne 0 ]]; then + return 1 + fi + done + + return 0 +} + +############################################################################## +# Tests + +ping_test() +{ + local if_name=$1 + local dip=$2 + local vrf_name + + RET=0 + + vrf_name=$(master_name_get $if_name) + ip vrf exec $vrf_name $PING $dip -c 10 -i 0.1 -w 2 &> /dev/null + check_err $? + log_test "ping" +} + +ping6_test() +{ + local if_name=$1 + local dip=$2 + local vrf_name + + RET=0 + + vrf_name=$(master_name_get $if_name) + ip vrf exec $vrf_name $PING6 $dip -c 10 -i 0.1 -w 2 &> /dev/null + check_err $? + log_test "ping6" +} + +learning_test() +{ + local bridge=$1 + local br_port1=$2 # Connected to `host1_if`. + local host1_if=$3 + local host2_if=$4 + local mac=de:ad:be:ef:13:37 + local ageing_time + + RET=0 + + bridge -j fdb show br $bridge brport $br_port1 \ + | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null + check_fail $? "Found FDB record when should not" + + # Disable unknown unicast flooding on `br_port1` to make sure + # packets are only forwarded through the port after a matching + # FDB entry was installed. + bridge link set dev $br_port1 flood off + + tc qdisc add dev $host1_if ingress + tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \ + flower dst_mac $mac action drop + + $MZ $host2_if -c 1 -p 64 -b $mac -t ip -q + sleep 1 + + tc -j -s filter show dev $host1_if ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_fail $? "Packet reached second host when should not" + + $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q + sleep 1 + + bridge -j fdb show br $bridge brport $br_port1 \ + | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null + check_err $? "Did not find FDB record when should" + + $MZ $host2_if -c 1 -p 64 -b $mac -t ip -q + sleep 1 + + tc -j -s filter show dev $host1_if ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err $? "Packet did not reach second host when should" + + # Wait for 10 seconds after the ageing time to make sure FDB + # record was aged-out. + ageing_time=$(bridge_ageing_time_get $bridge) + sleep $((ageing_time + 10)) + + bridge -j fdb show br $bridge brport $br_port1 \ + | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null + check_fail $? "Found FDB record when should not" + + bridge link set dev $br_port1 learning off + + $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q + sleep 1 + + bridge -j fdb show br $bridge brport $br_port1 \ + | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null + check_fail $? "Found FDB record when should not" + + bridge link set dev $br_port1 learning on + + tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower + tc qdisc del dev $host1_if ingress + + bridge link set dev $br_port1 flood on + + log_test "FDB learning" +} + +flood_test_do() +{ + local should_flood=$1 + local mac=$2 + local ip=$3 + local host1_if=$4 + local host2_if=$5 + local err=0 + + # Add an ACL on `host2_if` which will tell us whether the packet + # was flooded to it or not. + tc qdisc add dev $host2_if ingress + tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \ + flower dst_mac $mac action drop + + $MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t ip -q + sleep 1 + + tc -j -s filter show dev $host2_if ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + if [[ $? -ne 0 && $should_flood == "true" || \ + $? -eq 0 && $should_flood == "false" ]]; then + err=1 + fi + + tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower + tc qdisc del dev $host2_if ingress + + return $err +} + +flood_unicast_test() +{ + local br_port=$1 + local host1_if=$2 + local host2_if=$3 + local mac=de:ad:be:ef:13:37 + local ip=192.0.2.100 + + RET=0 + + bridge link set dev $br_port flood off + + flood_test_do false $mac $ip $host1_if $host2_if + check_err $? "Packet flooded when should not" + + bridge link set dev $br_port flood on + + flood_test_do true $mac $ip $host1_if $host2_if + check_err $? "Packet was not flooded when should" + + log_test "Unknown unicast flood" +} + +flood_multicast_test() +{ + local br_port=$1 + local host1_if=$2 + local host2_if=$3 + local mac=01:00:5e:00:00:01 + local ip=239.0.0.1 + + RET=0 + + bridge link set dev $br_port mcast_flood off + + flood_test_do false $mac $ip $host1_if $host2_if + check_err $? "Packet flooded when should not" + + bridge link set dev $br_port mcast_flood on + + flood_test_do true $mac $ip $host1_if $host2_if + check_err $? "Packet was not flooded when should" + + log_test "Unregistered multicast flood" +} + +flood_test() +{ + # `br_port` is connected to `host2_if` + local br_port=$1 + local host1_if=$2 + local host2_if=$3 + + flood_unicast_test $br_port $host1_if $host2_if + flood_multicast_test $br_port $host1_if $host2_if +} diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh new file mode 100755 index 000000000000..cc6a14abfa87 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router.sh @@ -0,0 +1,125 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + vrf_create "vrf-h1" + ip link set dev $h1 master vrf-h1 + + ip link set dev vrf-h1 up + ip link set dev $h1 up + + ip address add 192.0.2.2/24 dev $h1 + ip address add 2001:db8:1::2/64 dev $h1 + + ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1 + ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1 +} + +h1_destroy() +{ + ip route del 2001:db8:2::/64 vrf vrf-h1 + ip route del 198.51.100.0/24 vrf vrf-h1 + + ip address del 2001:db8:1::2/64 dev $h1 + ip address del 192.0.2.2/24 dev $h1 + + ip link set dev $h1 down + vrf_destroy "vrf-h1" +} + +h2_create() +{ + vrf_create "vrf-h2" + ip link set dev $h2 master vrf-h2 + + ip link set dev vrf-h2 up + ip link set dev $h2 up + + ip address add 198.51.100.2/24 dev $h2 + ip address add 2001:db8:2::2/64 dev $h2 + + ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1 + ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1 +} + +h2_destroy() +{ + ip route del 2001:db8:1::/64 vrf vrf-h2 + ip route del 192.0.2.0/24 vrf vrf-h2 + + ip address del 2001:db8:2::2/64 dev $h2 + ip address del 198.51.100.2/24 dev $h2 + + ip link set dev $h2 down + vrf_destroy "vrf-h2" +} + +router_create() +{ + ip link set dev $rp1 up + ip link set dev $rp2 up + + ip address add 192.0.2.1/24 dev $rp1 + ip address add 2001:db8:1::1/64 dev $rp1 + + ip address add 198.51.100.1/24 dev $rp2 + ip address add 2001:db8:2::1/64 dev $rp2 +} + +router_destroy() +{ + ip address del 2001:db8:2::1/64 dev $rp2 + ip address del 198.51.100.1/24 dev $rp2 + + ip address del 2001:db8:1::1/64 dev $rp1 + ip address del 192.0.2.1/24 dev $rp1 + + ip link set dev $rp2 down + ip link set dev $rp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +ping_test $h1 198.51.100.2 +ping6_test $h1 2001:db8:2::2 + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh new file mode 100755 index 000000000000..3bc351008db6 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_multipath.sh @@ -0,0 +1,376 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NUM_NETIFS=8 +source lib.sh + +h1_create() +{ + vrf_create "vrf-h1" + ip link set dev $h1 master vrf-h1 + + ip link set dev vrf-h1 up + ip link set dev $h1 up + + ip address add 192.0.2.2/24 dev $h1 + ip address add 2001:db8:1::2/64 dev $h1 + + ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1 + ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1 +} + +h1_destroy() +{ + ip route del 2001:db8:2::/64 vrf vrf-h1 + ip route del 198.51.100.0/24 vrf vrf-h1 + + ip address del 2001:db8:1::2/64 dev $h1 + ip address del 192.0.2.2/24 dev $h1 + + ip link set dev $h1 down + vrf_destroy "vrf-h1" +} + +h2_create() +{ + vrf_create "vrf-h2" + ip link set dev $h2 master vrf-h2 + + ip link set dev vrf-h2 up + ip link set dev $h2 up + + ip address add 198.51.100.2/24 dev $h2 + ip address add 2001:db8:2::2/64 dev $h2 + + ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1 + ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1 +} + +h2_destroy() +{ + ip route del 2001:db8:1::/64 vrf vrf-h2 + ip route del 192.0.2.0/24 vrf vrf-h2 + + ip address del 2001:db8:2::2/64 dev $h2 + ip address del 198.51.100.2/24 dev $h2 + + ip link set dev $h2 down + vrf_destroy "vrf-h2" +} + +router1_create() +{ + vrf_create "vrf-r1" + ip link set dev $rp11 master vrf-r1 + ip link set dev $rp12 master vrf-r1 + ip link set dev $rp13 master vrf-r1 + + ip link set dev vrf-r1 up + ip link set dev $rp11 up + ip link set dev $rp12 up + ip link set dev $rp13 up + + ip address add 192.0.2.1/24 dev $rp11 + ip address add 2001:db8:1::1/64 dev $rp11 + + ip address add 169.254.2.12/24 dev $rp12 + ip address add fe80:2::12/64 dev $rp12 + + ip address add 169.254.3.13/24 dev $rp13 + ip address add fe80:3::13/64 dev $rp13 + + ip route add 198.51.100.0/24 vrf vrf-r1 \ + nexthop via 169.254.2.22 dev $rp12 \ + nexthop via 169.254.3.23 dev $rp13 + ip route add 2001:db8:2::/64 vrf vrf-r1 \ + nexthop via fe80:2::22 dev $rp12 \ + nexthop via fe80:3::23 dev $rp13 +} + +router1_destroy() +{ + ip route del 2001:db8:2::/64 vrf vrf-r1 + ip route del 198.51.100.0/24 vrf vrf-r1 + + ip address del fe80:3::13/64 dev $rp13 + ip address del 169.254.3.13/24 dev $rp13 + + ip address del fe80:2::12/64 dev $rp12 + ip address del 169.254.2.12/24 dev $rp12 + + ip address del 2001:db8:1::1/64 dev $rp11 + ip address del 192.0.2.1/24 dev $rp11 + + ip link set dev $rp13 down + ip link set dev $rp12 down + ip link set dev $rp11 down + + vrf_destroy "vrf-r1" +} + +router2_create() +{ + vrf_create "vrf-r2" + ip link set dev $rp21 master vrf-r2 + ip link set dev $rp22 master vrf-r2 + ip link set dev $rp23 master vrf-r2 + + ip link set dev vrf-r2 up + ip link set dev $rp21 up + ip link set dev $rp22 up + ip link set dev $rp23 up + + ip address add 198.51.100.1/24 dev $rp21 + ip address add 2001:db8:2::1/64 dev $rp21 + + ip address add 169.254.2.22/24 dev $rp22 + ip address add fe80:2::22/64 dev $rp22 + + ip address add 169.254.3.23/24 dev $rp23 + ip address add fe80:3::23/64 dev $rp23 + + ip route add 192.0.2.0/24 vrf vrf-r2 \ + nexthop via 169.254.2.12 dev $rp22 \ + nexthop via 169.254.3.13 dev $rp23 + ip route add 2001:db8:1::/64 vrf vrf-r2 \ + nexthop via fe80:2::12 dev $rp22 \ + nexthop via fe80:3::13 dev $rp23 +} + +router2_destroy() +{ + ip route del 2001:db8:1::/64 vrf vrf-r2 + ip route del 192.0.2.0/24 vrf vrf-r2 + + ip address del fe80:3::23/64 dev $rp23 + ip address del 169.254.3.23/24 dev $rp23 + + ip address del fe80:2::22/64 dev $rp22 + ip address del 169.254.2.22/24 dev $rp22 + + ip address del 2001:db8:2::1/64 dev $rp21 + ip address del 198.51.100.1/24 dev $rp21 + + ip link set dev $rp23 down + ip link set dev $rp22 down + ip link set dev $rp21 down + + vrf_destroy "vrf-r2" +} + +multipath_eval() +{ + local desc="$1" + local weight_rp12=$2 + local weight_rp13=$3 + local packets_rp12=$4 + local packets_rp13=$5 + local weights_ratio packets_ratio diff + + RET=0 + + if [[ "$packets_rp12" -eq "0" || "$packets_rp13" -eq "0" ]]; then + check_err 1 "Packet difference is 0" + log_test "Multipath" + log_info "Expected ratio $weights_ratio" + return + fi + + if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then + weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \ + | bc -l) + packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \ + | bc -l) + else + weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" | \ + bc -l) + packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" | \ + bc -l) + fi + + diff=$(echo $weights_ratio - $packets_ratio | bc -l) + diff=${diff#-} + + test "$(echo "$diff / $weights_ratio > 0.1" | bc -l)" -eq 0 + check_err $? "Too large discrepancy between expected and measured ratios" + log_test "$desc" + log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio" +} + +multipath4_test() +{ + local desc="$1" + local weight_rp12=$2 + local weight_rp13=$3 + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 + local packets_rp12 packets_rp13 + local hash_policy + + # Transmit multiple flows from h1 to h2 and make sure they are + # distributed between both multipath links (rp12 and rp13) + # according to the configured weights. + hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy) + sysctl -q -w net.ipv4.fib_multipath_hash_policy=1 + ip route replace 198.51.100.0/24 vrf vrf-r1 \ + nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \ + nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13 + + t0_rp12=$(link_stats_tx_packets_get $rp12) + t0_rp13=$(link_stats_tx_packets_get $rp13) + + ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ + -d 1msec -t udp "sp=1024,dp=0-32768" + + t1_rp12=$(link_stats_tx_packets_get $rp12) + t1_rp13=$(link_stats_tx_packets_get $rp13) + + let "packets_rp12 = $t1_rp12 - $t0_rp12" + let "packets_rp13 = $t1_rp13 - $t0_rp13" + multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13 + + # Restore settings. + ip route replace 198.51.100.0/24 vrf vrf-r1 \ + nexthop via 169.254.2.22 dev $rp12 \ + nexthop via 169.254.3.23 dev $rp13 + sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy +} + +multipath6_l4_test() +{ + local desc="$1" + local weight_rp12=$2 + local weight_rp13=$3 + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 + local packets_rp12 packets_rp13 + local hash_policy + + # Transmit multiple flows from h1 to h2 and make sure they are + # distributed between both multipath links (rp12 and rp13) + # according to the configured weights. + hash_policy=$(sysctl -n net.ipv6.fib_multipath_hash_policy) + sysctl -q -w net.ipv6.fib_multipath_hash_policy=1 + + ip route replace 2001:db8:2::/64 vrf vrf-r1 \ + nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \ + nexthop via fe80:3::23 dev $rp13 weight $weight_rp13 + + t0_rp12=$(link_stats_tx_packets_get $rp12) + t0_rp13=$(link_stats_tx_packets_get $rp13) + + $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ + -d 1msec -t udp "sp=1024,dp=0-32768" + + t1_rp12=$(link_stats_tx_packets_get $rp12) + t1_rp13=$(link_stats_tx_packets_get $rp13) + + let "packets_rp12 = $t1_rp12 - $t0_rp12" + let "packets_rp13 = $t1_rp13 - $t0_rp13" + multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13 + + ip route replace 2001:db8:2::/64 vrf vrf-r1 \ + nexthop via fe80:2::22 dev $rp12 \ + nexthop via fe80:3::23 dev $rp13 + + sysctl -q -w net.ipv6.fib_multipath_hash_policy=$hash_policy +} + +multipath6_test() +{ + local desc="$1" + local weight_rp12=$2 + local weight_rp13=$3 + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 + local packets_rp12 packets_rp13 + + ip route replace 2001:db8:2::/64 vrf vrf-r1 \ + nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \ + nexthop via fe80:3::23 dev $rp13 weight $weight_rp13 + + t0_rp12=$(link_stats_tx_packets_get $rp12) + t0_rp13=$(link_stats_tx_packets_get $rp13) + + # Generate 16384 echo requests, each with a random flow label. + for _ in $(seq 1 16384); do + ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null + done + + t1_rp12=$(link_stats_tx_packets_get $rp12) + t1_rp13=$(link_stats_tx_packets_get $rp13) + + let "packets_rp12 = $t1_rp12 - $t0_rp12" + let "packets_rp13 = $t1_rp13 - $t0_rp13" + multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13 + + ip route replace 2001:db8:2::/64 vrf vrf-r1 \ + nexthop via fe80:2::22 dev $rp12 \ + nexthop via fe80:3::23 dev $rp13 +} + +multipath_test() +{ + log_info "Running IPv4 multipath tests" + multipath4_test "ECMP" 1 1 + multipath4_test "Weighted MP 2:1" 2 1 + multipath4_test "Weighted MP 11:45" 11 45 + + log_info "Running IPv6 multipath tests" + multipath6_test "ECMP" 1 1 + multipath6_test "Weighted MP 2:1" 2 1 + multipath6_test "Weighted MP 11:45" 11 45 + + log_info "Running IPv6 L4 hash multipath tests" + multipath6_l4_test "ECMP" 1 1 + multipath6_l4_test "Weighted MP 2:1" 2 1 + multipath6_l4_test "Weighted MP 11:45" 11 45 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp11=${NETIFS[p2]} + + rp12=${NETIFS[p3]} + rp22=${NETIFS[p4]} + + rp13=${NETIFS[p5]} + rp23=${NETIFS[p6]} + + rp21=${NETIFS[p7]} + h2=${NETIFS[p8]} + + vrf_prepare + + h1_create + h2_create + + router1_create + router2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router2_destroy + router1_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +ping_test $h1 198.51.100.2 +ping6_test $h1 2001:db8:2::2 +multipath_test + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh new file mode 100755 index 000000000000..3a6385ebd5d0 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh @@ -0,0 +1,202 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NUM_NETIFS=4 +source tc_common.sh +source lib.sh + +tcflags="skip_hw" + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.2/24 +} + +switch_create() +{ + simple_if_init $swp1 192.0.2.2/24 + tc qdisc add dev $swp1 clsact + + simple_if_init $swp2 192.0.2.1/24 +} + +switch_destroy() +{ + simple_if_fini $swp2 192.0.2.1/24 + + tc qdisc del dev $swp1 clsact + simple_if_fini $swp1 192.0.2.2/24 +} + +mirred_egress_test() +{ + local action=$1 + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched without redirect rule inserted" + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 action mirred egress $action \ + dev $swp2 + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match incoming $action packet" + + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + + log_test "mirred egress $action ($tcflags)" +} + +gact_drop_and_ok_test() +{ + RET=0 + + tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.0.2.2 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $swp1 ingress" 102 1 + check_err $? "Packet was not dropped" + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 action ok + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $swp1 ingress" 101 1 + check_err $? "Did not see passed packet" + + tc_check_packets "dev $swp1 ingress" 102 2 + check_fail $? "Packet was dropped and it should not reach here" + + tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + + log_test "gact drop and ok ($tcflags)" +} + +gact_trap_test() +{ + RET=0 + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + skip_hw dst_ip 192.0.2.2 action drop + tc filter add dev $swp1 ingress protocol ip pref 3 handle 103 flower \ + $tcflags dst_ip 192.0.2.2 action mirred egress redirect \ + dev $swp2 + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $swp1 ingress" 101 1 + check_fail $? "Saw packet without trap rule inserted" + + tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.0.2.2 action trap + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $swp1 ingress" 102 1 + check_err $? "Packet was not trapped" + + tc_check_packets "dev $swp1 ingress" 101 1 + check_err $? "Did not see trapped packet" + + tc filter del dev $swp1 ingress protocol ip pref 3 handle 103 flower + tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + + log_test "trap ($tcflags)" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h1mac=$(mac_get $h1) + h2mac=$(mac_get $h2) + + swp1origmac=$(mac_get $swp1) + swp2origmac=$(mac_get $swp2) + ip link set $swp1 address $h2mac + ip link set $swp2 address $h1mac + + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup + + ip link set $swp2 address $swp2origmac + ip link set $swp1 address $swp1origmac +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +gact_drop_and_ok_test +mirred_egress_test "redirect" +mirred_egress_test "mirror" + +tc_offload_check +if [[ $? -ne 0 ]]; then + log_info "Could not test offloaded functionality" +else + tcflags="skip_sw" + gact_drop_and_ok_test + mirred_egress_test "redirect" + mirred_egress_test "mirror" + gact_trap_test +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh new file mode 100755 index 000000000000..2fd15226974b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_chains.sh @@ -0,0 +1,122 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NUM_NETIFS=2 +source tc_common.sh +source lib.sh + +tcflags="skip_hw" + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.2/24 +} + +unreachable_chain_test() +{ + RET=0 + + tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \ + flower $tcflags dst_mac $h2mac action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 1101 1 + check_fail $? "matched on filter in unreachable chain" + + tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \ + flower + + log_test "unreachable chain ($tcflags)" +} + +gact_goto_chain_test() +{ + RET=0 + + tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \ + flower $tcflags dst_mac $h2mac action drop + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_mac $h2mac action drop + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_mac $h2mac action goto chain 1 + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 1 + check_fail $? "Matched on a wrong filter" + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match on correct filter with goto chain action" + + tc_check_packets "dev $h2 ingress" 1101 1 + check_err $? "Did not match on correct filter in chain 1" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \ + flower + + log_test "gact goto chain ($tcflags)" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + h1mac=$(mac_get $h1) + h2mac=$(mac_get $h2) + + vrf_prepare + + h1_create + h2_create +} + +cleanup() +{ + pre_cleanup + + h2_destroy + h1_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +unreachable_chain_test +gact_goto_chain_test + +tc_offload_check +if [[ $? -ne 0 ]]; then + log_info "Could not test offloaded functionality" +else + tcflags="skip_sw" + unreachable_chain_test + gact_goto_chain_test +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh new file mode 100644 index 000000000000..9d3b64a2a264 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_common.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +CHECK_TC="yes" + +tc_check_packets() +{ + local id=$1 + local handle=$2 + local count=$3 + local ret + + output="$(tc -j -s filter show $id)" + # workaround the jq bug which causes jq to return 0 in case input is "" + ret=$? + if [[ $ret -ne 0 ]]; then + return $ret + fi + echo $output | \ + jq -e ".[] \ + | select(.options.handle == $handle) \ + | select(.options.actions[0].stats.packets == $count)" \ + &> /dev/null + return $? +} diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh new file mode 100755 index 000000000000..032b882adfc0 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_flower.sh @@ -0,0 +1,196 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NUM_NETIFS=2 +source tc_common.sh +source lib.sh + +tcflags="skip_hw" + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 198.51.100.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 198.51.100.1/24 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 198.51.100.2/24 + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.2/24 198.51.100.2/24 +} + +match_dst_mac_test() +{ + local dummy_mac=de:ad:be:ef:aa:aa + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_mac $dummy_mac action drop + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_mac $h2mac action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched on a wrong filter" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + + log_test "dst_mac match ($tcflags)" +} + +match_src_mac_test() +{ + local dummy_mac=de:ad:be:ef:aa:aa + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags src_mac $dummy_mac action drop + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags src_mac $h1mac action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched on a wrong filter" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + + log_test "src_mac match ($tcflags)" +} + +match_dst_ip_test() +{ + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 198.51.100.2 action drop + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.0.2.2 action drop + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags dst_ip 192.0.2.0/24 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched on a wrong filter" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter" + + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Did not match on correct filter with mask" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + + log_test "dst_ip match ($tcflags)" +} + +match_src_ip_test() +{ + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags src_ip 198.51.100.1 action drop + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags src_ip 192.0.2.1 action drop + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags src_ip 192.0.2.0/24 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched on a wrong filter" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter" + + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Did not match on correct filter with mask" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + + log_test "src_ip match ($tcflags)" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + h1mac=$(mac_get $h1) + h2mac=$(mac_get $h2) + + vrf_prepare + + h1_create + h2_create +} + +cleanup() +{ + pre_cleanup + + h2_destroy + h1_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +match_dst_mac_test +match_src_mac_test +match_dst_ip_test +match_src_ip_test + +tc_offload_check +if [[ $? -ne 0 ]]; then + log_info "Could not test offloaded functionality" +else + tcflags="skip_sw" + match_dst_mac_test + match_src_mac_test + match_dst_ip_test + match_src_ip_test +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh new file mode 100755 index 000000000000..077b98048ef4 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_shblocks.sh @@ -0,0 +1,122 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NUM_NETIFS=4 +source tc_common.sh +source lib.sh + +tcflags="skip_hw" + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.1/24 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.1/24 +} + +switch_create() +{ + simple_if_init $swp1 192.0.2.2/24 + tc qdisc add dev $swp1 ingress_block 22 egress_block 23 clsact + + simple_if_init $swp2 192.0.2.2/24 + tc qdisc add dev $swp2 ingress_block 22 egress_block 23 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp2 clsact + simple_if_fini $swp2 192.0.2.2/24 + + tc qdisc del dev $swp1 clsact + simple_if_fini $swp1 192.0.2.2/24 +} + +shared_block_test() +{ + RET=0 + + tc filter add block 22 protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "block 22" 101 1 + check_err $? "Did not match first incoming packet on a block" + + $MZ $h2 -c 1 -p 64 -a $h2mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "block 22" 101 2 + check_err $? "Did not match second incoming packet on a block" + + tc filter del block 22 protocol ip pref 1 handle 101 flower + + log_test "shared block ($tcflags)" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h1mac=$(mac_get $h1) + h2mac=$(mac_get $h2) + + swmac=$(mac_get $swp1) + swp2origmac=$(mac_get $swp2) + ip link set $swp2 address $swmac + + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup + + ip link set $swp2 address $swp2origmac +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +shared_block_test + +tc_offload_check +if [[ $? -ne 0 ]]; then + log_info "Could not test offloaded functionality" +else + tcflags="skip_sw" + shared_block_test +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/in_netns.sh b/tools/testing/selftests/net/in_netns.sh new file mode 100755 index 000000000000..88795b510b32 --- /dev/null +++ b/tools/testing/selftests/net/in_netns.sh @@ -0,0 +1,23 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Execute a subprocess in a network namespace + +set -e + +readonly NETNS="ns-$(mktemp -u XXXXXX)" + +setup() { + ip netns add "${NETNS}" + ip -netns "${NETNS}" link set lo up +} + +cleanup() { + ip netns del "${NETNS}" +} + +trap cleanup EXIT +setup + +ip netns exec "${NETNS}" "$@" +exit "$?" diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c index 3ab6ec403905..406cc70c571d 100644 --- a/tools/testing/selftests/net/msg_zerocopy.c +++ b/tools/testing/selftests/net/msg_zerocopy.c @@ -14,6 +14,9 @@ * - SOCK_DGRAM * - SOCK_RAW * + * PF_RDS + * - SOCK_SEQPACKET + * * Start this program on two connected hosts, one in send mode and * the other with option '-r' to put it in receiver mode. * @@ -53,6 +56,7 @@ #include <sys/types.h> #include <sys/wait.h> #include <unistd.h> +#include <linux/rds.h> #ifndef SO_EE_ORIGIN_ZEROCOPY #define SO_EE_ORIGIN_ZEROCOPY 5 @@ -164,17 +168,39 @@ static int do_accept(int fd) return fd; } -static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy) +static void add_zcopy_cookie(struct msghdr *msg, uint32_t cookie) +{ + struct cmsghdr *cm; + + if (!msg->msg_control) + error(1, errno, "NULL cookie"); + cm = (void *)msg->msg_control; + cm->cmsg_len = CMSG_LEN(sizeof(cookie)); + cm->cmsg_level = SOL_RDS; + cm->cmsg_type = RDS_CMSG_ZCOPY_COOKIE; + memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie)); +} + +static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain) { int ret, len, i, flags; + static uint32_t cookie; + char ckbuf[CMSG_SPACE(sizeof(cookie))]; len = 0; for (i = 0; i < msg->msg_iovlen; i++) len += msg->msg_iov[i].iov_len; flags = MSG_DONTWAIT; - if (do_zerocopy) + if (do_zerocopy) { flags |= MSG_ZEROCOPY; + if (domain == PF_RDS) { + memset(&msg->msg_control, 0, sizeof(msg->msg_control)); + msg->msg_controllen = CMSG_SPACE(sizeof(cookie)); + msg->msg_control = (struct cmsghdr *)ckbuf; + add_zcopy_cookie(msg, ++cookie); + } + } ret = sendmsg(fd, msg, flags); if (ret == -1 && errno == EAGAIN) @@ -190,6 +216,10 @@ static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy) if (do_zerocopy && ret) expected_completions++; } + if (do_zerocopy && domain == PF_RDS) { + msg->msg_control = NULL; + msg->msg_controllen = 0; + } return true; } @@ -216,7 +246,9 @@ static void do_sendmsg_corked(int fd, struct msghdr *msg) msg->msg_iov[0].iov_len = payload_len + extra_len; extra_len = 0; - do_sendmsg(fd, msg, do_zerocopy); + do_sendmsg(fd, msg, do_zerocopy, + (cfg_dst_addr.ss_family == AF_INET ? + PF_INET : PF_INET6)); } do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0); @@ -259,22 +291,28 @@ static int setup_ip6h(struct ipv6hdr *ip6h, uint16_t payload_len) return sizeof(*ip6h); } -static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr) + +static void setup_sockaddr(int domain, const char *str_addr, + struct sockaddr_storage *sockaddr) { struct sockaddr_in6 *addr6 = (void *) sockaddr; struct sockaddr_in *addr4 = (void *) sockaddr; switch (domain) { case PF_INET: + memset(addr4, 0, sizeof(*addr4)); addr4->sin_family = AF_INET; addr4->sin_port = htons(cfg_port); - if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) + if (str_addr && + inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) error(1, 0, "ipv4 parse error: %s", str_addr); break; case PF_INET6: + memset(addr6, 0, sizeof(*addr6)); addr6->sin6_family = AF_INET6; addr6->sin6_port = htons(cfg_port); - if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) + if (str_addr && + inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) error(1, 0, "ipv6 parse error: %s", str_addr); break; default: @@ -294,14 +332,65 @@ static int do_setup_tx(int domain, int type, int protocol) if (cfg_zerocopy) do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1); - if (domain != PF_PACKET) + if (domain != PF_PACKET && domain != PF_RDS) if (connect(fd, (void *) &cfg_dst_addr, cfg_alen)) error(1, errno, "connect"); + if (domain == PF_RDS) { + if (bind(fd, (void *) &cfg_src_addr, cfg_alen)) + error(1, errno, "bind"); + } + return fd; } -static bool do_recv_completion(int fd) +static uint32_t do_process_zerocopy_cookies(struct rds_zcopy_cookies *ck) +{ + int i; + + if (ck->num > RDS_MAX_ZCOOKIES) + error(1, 0, "Returned %d cookies, max expected %d\n", + ck->num, RDS_MAX_ZCOOKIES); + for (i = 0; i < ck->num; i++) + if (cfg_verbose >= 2) + fprintf(stderr, "%d\n", ck->cookies[i]); + return ck->num; +} + +static bool do_recvmsg_completion(int fd) +{ + char cmsgbuf[CMSG_SPACE(sizeof(struct rds_zcopy_cookies))]; + struct rds_zcopy_cookies *ck; + struct cmsghdr *cmsg; + struct msghdr msg; + bool ret = false; + + memset(&msg, 0, sizeof(msg)); + msg.msg_control = cmsgbuf; + msg.msg_controllen = sizeof(cmsgbuf); + + if (recvmsg(fd, &msg, MSG_DONTWAIT)) + return ret; + + if (msg.msg_flags & MSG_CTRUNC) + error(1, errno, "recvmsg notification: truncated"); + + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level == SOL_RDS && + cmsg->cmsg_type == RDS_CMSG_ZCOPY_COMPLETION) { + + ck = (struct rds_zcopy_cookies *)CMSG_DATA(cmsg); + completions += do_process_zerocopy_cookies(ck); + ret = true; + break; + } + error(0, 0, "ignoring cmsg at level %d type %d\n", + cmsg->cmsg_level, cmsg->cmsg_type); + } + return ret; +} + +static bool do_recv_completion(int fd, int domain) { struct sock_extended_err *serr; struct msghdr msg = {}; @@ -310,6 +399,9 @@ static bool do_recv_completion(int fd) int ret, zerocopy; char control[100]; + if (domain == PF_RDS) + return do_recvmsg_completion(fd); + msg.msg_control = control; msg.msg_controllen = sizeof(control); @@ -331,6 +423,7 @@ static bool do_recv_completion(int fd) cm->cmsg_level, cm->cmsg_type); serr = (void *) CMSG_DATA(cm); + if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) error(1, 0, "serr: wrong origin: %u", serr->ee_origin); if (serr->ee_errno != 0) @@ -365,20 +458,20 @@ static bool do_recv_completion(int fd) } /* Read all outstanding messages on the errqueue */ -static void do_recv_completions(int fd) +static void do_recv_completions(int fd, int domain) { - while (do_recv_completion(fd)) {} + while (do_recv_completion(fd, domain)) {} } /* Wait for all remaining completions on the errqueue */ -static void do_recv_remaining_completions(int fd) +static void do_recv_remaining_completions(int fd, int domain) { int64_t tstop = gettimeofday_ms() + cfg_waittime_ms; while (completions < expected_completions && gettimeofday_ms() < tstop) { - if (do_poll(fd, POLLERR)) - do_recv_completions(fd); + if (do_poll(fd, domain == PF_RDS ? POLLIN : POLLERR)) + do_recv_completions(fd, domain); } if (completions < expected_completions) @@ -438,6 +531,13 @@ static void do_tx(int domain, int type, int protocol) msg.msg_iovlen++; } + if (domain == PF_RDS) { + msg.msg_name = &cfg_dst_addr; + msg.msg_namelen = (cfg_dst_addr.ss_family == AF_INET ? + sizeof(struct sockaddr_in) : + sizeof(struct sockaddr_in6)); + } + iov[2].iov_base = payload; iov[2].iov_len = cfg_payload_len; msg.msg_iovlen++; @@ -448,17 +548,17 @@ static void do_tx(int domain, int type, int protocol) if (cfg_cork) do_sendmsg_corked(fd, &msg); else - do_sendmsg(fd, &msg, cfg_zerocopy); + do_sendmsg(fd, &msg, cfg_zerocopy, domain); while (!do_poll(fd, POLLOUT)) { if (cfg_zerocopy) - do_recv_completions(fd); + do_recv_completions(fd, domain); } } while (gettimeofday_ms() < tstop); if (cfg_zerocopy) - do_recv_remaining_completions(fd); + do_recv_remaining_completions(fd, domain); if (close(fd)) error(1, errno, "close"); @@ -603,6 +703,8 @@ static void parse_opts(int argc, char **argv) sizeof(struct tcphdr) - 40 /* max tcp options */; int c; + char *daddr = NULL, *saddr = NULL; + char *cfg_test; cfg_payload_len = max_payload_len; @@ -627,7 +729,7 @@ static void parse_opts(int argc, char **argv) cfg_cpu = strtol(optarg, NULL, 0); break; case 'D': - setup_sockaddr(cfg_family, optarg, &cfg_dst_addr); + daddr = optarg; break; case 'i': cfg_ifindex = if_nametoindex(optarg); @@ -638,7 +740,7 @@ static void parse_opts(int argc, char **argv) cfg_cork_mixed = true; break; case 'p': - cfg_port = htons(strtoul(optarg, NULL, 0)); + cfg_port = strtoul(optarg, NULL, 0); break; case 'r': cfg_rx = true; @@ -647,7 +749,7 @@ static void parse_opts(int argc, char **argv) cfg_payload_len = strtoul(optarg, NULL, 0); break; case 'S': - setup_sockaddr(cfg_family, optarg, &cfg_src_addr); + saddr = optarg; break; case 't': cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000; @@ -661,6 +763,16 @@ static void parse_opts(int argc, char **argv) } } + cfg_test = argv[argc - 1]; + if (strcmp(cfg_test, "rds") == 0) { + if (!daddr) + error(1, 0, "-D <server addr> required for PF_RDS\n"); + if (!cfg_rx && !saddr) + error(1, 0, "-S <client addr> required for PF_RDS\n"); + } + setup_sockaddr(cfg_family, daddr, &cfg_dst_addr); + setup_sockaddr(cfg_family, saddr, &cfg_src_addr); + if (cfg_payload_len > max_payload_len) error(1, 0, "-s: payload exceeds max (%d)", max_payload_len); if (cfg_cork_mixed && (!cfg_zerocopy || !cfg_cork)) @@ -690,6 +802,8 @@ int main(int argc, char **argv) do_test(cfg_family, SOCK_STREAM, 0); else if (!strcmp(cfg_test, "udp")) do_test(cfg_family, SOCK_DGRAM, 0); + else if (!strcmp(cfg_test, "rds")) + do_test(PF_RDS, SOCK_SEQPACKET, 0); else error(1, 0, "unknown cfg_test %s", cfg_test); diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh new file mode 100755 index 000000000000..1e428781a625 --- /dev/null +++ b/tools/testing/selftests/net/pmtu.sh @@ -0,0 +1,471 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Check that route PMTU values match expectations, and that initial device MTU +# values are assigned correctly +# +# Tests currently implemented: +# +# - pmtu_vti4_exception +# Set up vti tunnel on top of veth, with xfrm states and policies, in two +# namespaces with matching endpoints. Check that route exception is not +# created if link layer MTU is not exceeded, then exceed it and check that +# exception is created with the expected PMTU. The approach described +# below for IPv6 doesn't apply here, because, on IPv4, administrative MTU +# changes alone won't affect PMTU +# +# - pmtu_vti6_exception +# Set up vti6 tunnel on top of veth, with xfrm states and policies, in two +# namespaces with matching endpoints. Check that route exception is +# created by exceeding link layer MTU with ping to other endpoint. Then +# decrease and increase MTU of tunnel, checking that route exception PMTU +# changes accordingly +# +# - pmtu_vti4_default_mtu +# Set up vti4 tunnel on top of veth, in two namespaces with matching +# endpoints. Check that MTU assigned to vti interface is the MTU of the +# lower layer (veth) minus additional lower layer headers (zero, for veth) +# minus IPv4 header length +# +# - pmtu_vti6_default_mtu +# Same as above, for IPv6 +# +# - pmtu_vti4_link_add_mtu +# Set up vti4 interface passing MTU value at link creation, check MTU is +# configured, and that link is not created with invalid MTU values +# +# - pmtu_vti6_link_add_mtu +# Same as above, for IPv6 +# +# - pmtu_vti6_link_change_mtu +# Set up two dummy interfaces with different MTUs, create a vti6 tunnel +# and check that configured MTU is used on link creation and changes, and +# that MTU is properly calculated instead when MTU is not configured from +# userspace + +tests=" + pmtu_vti6_exception vti6: PMTU exceptions + pmtu_vti4_exception vti4: PMTU exceptions + pmtu_vti4_default_mtu vti4: default MTU assignment + pmtu_vti6_default_mtu vti6: default MTU assignment + pmtu_vti4_link_add_mtu vti4: MTU setting on link creation + pmtu_vti6_link_add_mtu vti6: MTU setting on link creation + pmtu_vti6_link_change_mtu vti6: MTU changes on link changes" + +NS_A="ns-$(mktemp -u XXXXXX)" +NS_B="ns-$(mktemp -u XXXXXX)" +ns_a="ip netns exec ${NS_A}" +ns_b="ip netns exec ${NS_B}" + +veth4_a_addr="192.168.1.1" +veth4_b_addr="192.168.1.2" +veth4_mask="24" +veth6_a_addr="fd00:1::a" +veth6_b_addr="fd00:1::b" +veth6_mask="64" + +vti4_a_addr="192.168.2.1" +vti4_b_addr="192.168.2.2" +vti4_mask="24" +vti6_a_addr="fd00:2::a" +vti6_b_addr="fd00:2::b" +vti6_mask="64" + +dummy6_0_addr="fc00:1000::0" +dummy6_1_addr="fc00:1001::0" +dummy6_mask="64" + +cleanup_done=1 +err_buf= + +err() { + err_buf="${err_buf}${1} +" +} + +err_flush() { + echo -n "${err_buf}" + err_buf= +} + +setup_namespaces() { + ip netns add ${NS_A} || return 1 + ip netns add ${NS_B} +} + +setup_veth() { + ${ns_a} ip link add veth_a type veth peer name veth_b || return 1 + ${ns_a} ip link set veth_b netns ${NS_B} + + ${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a + ${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b + + ${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a + ${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b + + ${ns_a} ip link set veth_a up + ${ns_b} ip link set veth_b up +} + +setup_vti() { + proto=${1} + veth_a_addr="${2}" + veth_b_addr="${3}" + vti_a_addr="${4}" + vti_b_addr="${5}" + vti_mask=${6} + + [ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti" + + ${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1 + ${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10 + + ${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a + ${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b + + ${ns_a} ip link set vti${proto}_a up + ${ns_b} ip link set vti${proto}_b up + + sleep 1 +} + +setup_vti4() { + setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${vti4_a_addr} ${vti4_b_addr} ${vti4_mask} +} + +setup_vti6() { + setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${vti6_a_addr} ${vti6_b_addr} ${vti6_mask} +} + +setup_xfrm() { + proto=${1} + veth_a_addr="${2}" + veth_b_addr="${3}" + + ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1 + ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel + ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel + ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel + + ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel + ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel + ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel + ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel +} + +setup_xfrm4() { + setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} +} + +setup_xfrm6() { + setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} +} + +setup() { + [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return 1 + + cleanup_done=0 + for arg do + eval setup_${arg} || { echo " ${arg} not supported"; return 1; } + done +} + +cleanup() { + [ ${cleanup_done} -eq 1 ] && return + ip netns del ${NS_A} 2 > /dev/null + ip netns del ${NS_B} 2 > /dev/null + cleanup_done=1 +} + +mtu() { + ns_cmd="${1}" + dev="${2}" + mtu="${3}" + + ${ns_cmd} ip link set dev ${dev} mtu ${mtu} +} + +mtu_parse() { + input="${1}" + + next=0 + for i in ${input}; do + [ ${next} -eq 1 ] && echo "${i}" && return + [ "${i}" = "mtu" ] && next=1 + done +} + +link_get() { + ns_cmd="${1}" + name="${2}" + + ${ns_cmd} ip link show dev "${name}" +} + +link_get_mtu() { + ns_cmd="${1}" + name="${2}" + + mtu_parse "$(link_get "${ns_cmd}" ${name})" +} + +route_get_dst_exception() { + ns_cmd="${1}" + dst="${2}" + + ${ns_cmd} ip route get "${dst}" +} + +route_get_dst_pmtu_from_exception() { + ns_cmd="${1}" + dst="${2}" + + mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})" +} + +test_pmtu_vti4_exception() { + setup namespaces veth vti4 xfrm4 || return 2 + + veth_mtu=1500 + vti_mtu=$((veth_mtu - 20)) + + # SPI SN IV ICV pad length next header + esp_payload_rfc4106=$((vti_mtu - 4 - 4 - 8 - 16 - 1 - 1)) + ping_payload=$((esp_payload_rfc4106 - 28)) + + mtu "${ns_a}" veth_a ${veth_mtu} + mtu "${ns_b}" veth_b ${veth_mtu} + mtu "${ns_a}" vti4_a ${vti_mtu} + mtu "${ns_b}" vti4_b ${vti_mtu} + + # Send DF packet without exceeding link layer MTU, check that no + # exception is created + ${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${vti4_b_addr} > /dev/null + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})" + if [ "${pmtu}" != "" ]; then + err " unexpected exception created with PMTU ${pmtu} for IP payload length ${esp_payload_rfc4106}" + return 1 + fi + + # Now exceed link layer MTU by one byte, check that exception is created + ${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${vti4_b_addr} > /dev/null + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})" + if [ "${pmtu}" = "" ]; then + err " exception not created for IP payload length $((esp_payload_rfc4106 + 1))" + return 1 + fi + + # ...with the right PMTU value + if [ ${pmtu} -ne ${esp_payload_rfc4106} ]; then + err " wrong PMTU ${pmtu} in exception, expected: ${esp_payload_rfc4106}" + return 1 + fi +} + +test_pmtu_vti6_exception() { + setup namespaces veth vti6 xfrm6 || return 2 + fail=0 + + # Create route exception by exceeding link layer MTU + mtu "${ns_a}" veth_a 4000 + mtu "${ns_b}" veth_b 4000 + mtu "${ns_a}" vti6_a 5000 + mtu "${ns_b}" vti6_b 5000 + ${ns_a} ping6 -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null + + # Check that exception was created + if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" = "" ]; then + err " tunnel exceeding link layer MTU didn't create route exception" + return 1 + fi + + # Decrease tunnel MTU, check for PMTU decrease in route exception + mtu "${ns_a}" vti6_a 3000 + + if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 3000 ]; then + err " decreasing tunnel MTU didn't decrease route exception PMTU" + fail=1 + fi + + # Increase tunnel MTU, check for PMTU increase in route exception + mtu "${ns_a}" vti6_a 9000 + if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 9000 ]; then + err " increasing tunnel MTU didn't increase route exception PMTU" + fail=1 + fi + + return ${fail} +} + +test_pmtu_vti4_default_mtu() { + setup namespaces veth vti4 || return 2 + + # Check that MTU of vti device is MTU of veth minus IPv4 header length + veth_mtu="$(link_get_mtu "${ns_a}" veth_a)" + vti4_mtu="$(link_get_mtu "${ns_a}" vti4_a)" + if [ $((veth_mtu - vti4_mtu)) -ne 20 ]; then + err " vti MTU ${vti4_mtu} is not veth MTU ${veth_mtu} minus IPv4 header length" + return 1 + fi +} + +test_pmtu_vti6_default_mtu() { + setup namespaces veth vti6 || return 2 + + # Check that MTU of vti device is MTU of veth minus IPv6 header length + veth_mtu="$(link_get_mtu "${ns_a}" veth_a)" + vti6_mtu="$(link_get_mtu "${ns_a}" vti6_a)" + if [ $((veth_mtu - vti6_mtu)) -ne 40 ]; then + err " vti MTU ${vti6_mtu} is not veth MTU ${veth_mtu} minus IPv6 header length" + return 1 + fi +} + +test_pmtu_vti4_link_add_mtu() { + setup namespaces || return 2 + + ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 + [ $? -ne 0 ] && err " vti not supported" && return 2 + ${ns_a} ip link del vti4_a + + fail=0 + + min=68 + max=$((65528 - 20)) + # Check invalid values first + for v in $((min - 1)) $((max + 1)); do + ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 2>/dev/null + # This can fail, or MTU can be adjusted to a proper value + [ $? -ne 0 ] && continue + mtu="$(link_get_mtu "${ns_a}" vti4_a)" + if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then + err " vti tunnel created with invalid MTU ${mtu}" + fail=1 + fi + ${ns_a} ip link del vti4_a + done + + # Now check valid values + for v in ${min} 1300 ${max}; do + ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 + mtu="$(link_get_mtu "${ns_a}" vti4_a)" + ${ns_a} ip link del vti4_a + if [ "${mtu}" != "${v}" ]; then + err " vti MTU ${mtu} doesn't match configured value ${v}" + fail=1 + fi + done + + return ${fail} +} + +test_pmtu_vti6_link_add_mtu() { + setup namespaces || return 2 + + ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 + [ $? -ne 0 ] && err " vti6 not supported" && return 2 + ${ns_a} ip link del vti6_a + + fail=0 + + min=1280 + max=$((65535 - 40)) + # Check invalid values first + for v in $((min - 1)) $((max + 1)); do + ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 2>/dev/null + # This can fail, or MTU can be adjusted to a proper value + [ $? -ne 0 ] && continue + mtu="$(link_get_mtu "${ns_a}" vti6_a)" + if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then + err " vti6 tunnel created with invalid MTU ${v}" + fail=1 + fi + ${ns_a} ip link del vti6_a + done + + # Now check valid values + for v in 1280 1300 $((65535 - 40)); do + ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 + mtu="$(link_get_mtu "${ns_a}" vti6_a)" + ${ns_a} ip link del vti6_a + if [ "${mtu}" != "${v}" ]; then + err " vti6 MTU ${mtu} doesn't match configured value ${v}" + fail=1 + fi + done + + return ${fail} +} + +test_pmtu_vti6_link_change_mtu() { + setup namespaces || return 2 + + ${ns_a} ip link add dummy0 mtu 1500 type dummy + [ $? -ne 0 ] && err " dummy not supported" && return 2 + ${ns_a} ip link add dummy1 mtu 3000 type dummy + ${ns_a} ip link set dummy0 up + ${ns_a} ip link set dummy1 up + + ${ns_a} ip addr add ${dummy6_0_addr}/${dummy6_mask} dev dummy0 + ${ns_a} ip addr add ${dummy6_1_addr}/${dummy6_mask} dev dummy1 + + fail=0 + + # Create vti6 interface bound to device, passing MTU, check it + ${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr} + mtu="$(link_get_mtu "${ns_a}" vti6_a)" + if [ ${mtu} -ne 1300 ]; then + err " vti6 MTU ${mtu} doesn't match configured value 1300" + fail=1 + fi + + # Move to another device with different MTU, without passing MTU, check + # MTU is adjusted + ${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr} + mtu="$(link_get_mtu "${ns_a}" vti6_a)" + if [ ${mtu} -ne $((3000 - 40)) ]; then + err " vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length" + fail=1 + fi + + # Move it back, passing MTU, check MTU is not overridden + ${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr} + mtu="$(link_get_mtu "${ns_a}" vti6_a)" + if [ ${mtu} -ne 1280 ]; then + err " vti6 MTU ${mtu} doesn't match configured value 1280" + fail=1 + fi + + return ${fail} +} + +trap cleanup EXIT + +exitcode=0 +desc=0 +IFS=" +" +for t in ${tests}; do + [ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0 + + ( + unset IFS + eval test_${name} + ret=$? + cleanup + + if [ $ret -eq 0 ]; then + printf "TEST: %-60s [ OK ]\n" "${t}" + elif [ $ret -eq 1 ]; then + printf "TEST: %-60s [FAIL]\n" "${t}" + err_flush + exit 1 + elif [ $ret -eq 2 ]; then + printf "TEST: %-60s [SKIP]\n" "${t}" + err_flush + fi + ) + [ $? -ne 0 ] && exitcode=1 +done + +exit ${exitcode} diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c index 989f917068d1..bd9b9632c72b 100644 --- a/tools/testing/selftests/net/psock_fanout.c +++ b/tools/testing/selftests/net/psock_fanout.c @@ -50,6 +50,7 @@ #include <linux/filter.h> #include <linux/bpf.h> #include <linux/if_packet.h> +#include <net/if.h> #include <net/ethernet.h> #include <netinet/ip.h> #include <netinet/udp.h> @@ -73,14 +74,29 @@ * @return -1 if mode is bad, a valid socket otherwise */ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id) { + struct sockaddr_ll addr = {0}; int fd, val; - fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_IP)); + fd = socket(PF_PACKET, SOCK_RAW, 0); if (fd < 0) { perror("socket packet"); exit(1); } + pair_udp_setfilter(fd); + + addr.sll_family = AF_PACKET; + addr.sll_protocol = htons(ETH_P_IP); + addr.sll_ifindex = if_nametoindex("lo"); + if (addr.sll_ifindex == 0) { + perror("if_nametoindex"); + exit(1); + } + if (bind(fd, (void *) &addr, sizeof(addr))) { + perror("bind packet"); + exit(1); + } + val = (((int) typeflags) << 16) | group_id; if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) { if (close(fd)) { @@ -90,7 +106,6 @@ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id) return -1; } - pair_udp_setfilter(fd); return fd; } @@ -128,6 +143,8 @@ static void sock_fanout_getopts(int fd, uint16_t *typeflags, uint16_t *group_id) static void sock_fanout_set_ebpf(int fd) { + static char log_buf[65536]; + const int len_off = __builtin_offsetof(struct __sk_buff, len); struct bpf_insn prog[] = { { BPF_ALU64 | BPF_MOV | BPF_X, 6, 1, 0, 0 }, @@ -140,7 +157,6 @@ static void sock_fanout_set_ebpf(int fd) { BPF_ALU | BPF_MOV | BPF_K, 0, 0, 0, 0 }, { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 } }; - char log_buf[512]; union bpf_attr attr; int pfd; @@ -228,7 +244,7 @@ static int sock_fanout_read(int fds[], char *rings[], const int expect[]) if ((!(ret[0] == expect[0] && ret[1] == expect[1])) && (!(ret[0] == expect[1] && ret[1] == expect[0]))) { - fprintf(stderr, "ERROR: incorrect queue lengths\n"); + fprintf(stderr, "warning: incorrect queue lengths\n"); return 1; } @@ -347,7 +363,8 @@ static int test_datapath(uint16_t typeflags, int port_off, uint8_t type = typeflags & 0xFF; int fds[2], fds_udp[2][2], ret; - fprintf(stderr, "test: datapath 0x%hx\n", typeflags); + fprintf(stderr, "\ntest: datapath 0x%hx ports %hu,%hu\n", + typeflags, PORT_BASE, PORT_BASE + port_off); fds[0] = sock_fanout_open(typeflags, 0); fds[1] = sock_fanout_open(typeflags, 0); @@ -418,7 +435,7 @@ int main(int argc, char **argv) const int expect_cpu1[2][2] = { { 0, 20 }, { 0, 20 } }; const int expect_bpf[2][2] = { { 15, 5 }, { 15, 20 } }; const int expect_uniqueid[2][2] = { { 20, 20}, { 20, 20 } }; - int port_off = 2, tries = 5, ret; + int port_off = 2, tries = 20, ret; test_control_single(); test_control_group(); @@ -427,10 +444,14 @@ int main(int argc, char **argv) /* find a set of ports that do not collide onto the same socket */ ret = test_datapath(PACKET_FANOUT_HASH, port_off, expect_hash[0], expect_hash[1]); - while (ret && tries--) { + while (ret) { fprintf(stderr, "info: trying alternate ports (%d)\n", tries); ret = test_datapath(PACKET_FANOUT_HASH, ++port_off, expect_hash[0], expect_hash[1]); + if (!--tries) { + fprintf(stderr, "too many collisions\n"); + return 1; + } } ret |= test_datapath(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_ROLLOVER, diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c index 4a8217448f20..cad14cd0ea92 100644 --- a/tools/testing/selftests/net/reuseport_bpf.c +++ b/tools/testing/selftests/net/reuseport_bpf.c @@ -21,6 +21,7 @@ #include <sys/epoll.h> #include <sys/types.h> #include <sys/socket.h> +#include <sys/resource.h> #include <unistd.h> #ifndef ARRAY_SIZE @@ -190,11 +191,14 @@ static void send_from(struct test_params p, uint16_t sport, char *buf, struct sockaddr * const saddr = new_any_sockaddr(p.send_family, sport); struct sockaddr * const daddr = new_loopback_sockaddr(p.send_family, p.recv_port); - const int fd = socket(p.send_family, p.protocol, 0); + const int fd = socket(p.send_family, p.protocol, 0), one = 1; if (fd < 0) error(1, errno, "failed to create send socket"); + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))) + error(1, errno, "failed to set reuseaddr"); + if (bind(fd, saddr, sockaddr_size())) error(1, errno, "failed to bind send socket"); @@ -433,6 +437,21 @@ void enable_fastopen(void) } } +static struct rlimit rlim_old, rlim_new; + +static __attribute__((constructor)) void main_ctor(void) +{ + getrlimit(RLIMIT_MEMLOCK, &rlim_old); + rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20); + rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20); + setrlimit(RLIMIT_MEMLOCK, &rlim_new); +} + +static __attribute__((destructor)) void main_dtor(void) +{ + setrlimit(RLIMIT_MEMLOCK, &rlim_old); +} + int main(void) { fprintf(stderr, "---- IPv4 UDP ----\n"); diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 5215493166c9..e6f485235435 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -502,6 +502,237 @@ kci_test_macsec() echo "PASS: macsec" } +kci_test_gretap() +{ + testns="testns" + DEV_NS=gretap00 + ret=0 + + ip netns add "$testns" + if [ $? -ne 0 ]; then + echo "SKIP gretap tests: cannot add net namespace $testns" + return 1 + fi + + ip link help gretap 2>&1 | grep -q "^Usage:" + if [ $? -ne 0 ];then + echo "SKIP: gretap: iproute2 too old" + ip netns del "$testns" + return 1 + fi + + # test native tunnel + ip netns exec "$testns" ip link add dev "$DEV_NS" type gretap seq \ + key 102 local 172.16.1.100 remote 172.16.1.200 + check_err $? + + ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24 + check_err $? + + ip netns exec "$testns" ip link set dev $DEV_NS up + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + # test external mode + ip netns exec "$testns" ip link add dev "$DEV_NS" type gretap external + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: gretap" + ip netns del "$testns" + return 1 + fi + echo "PASS: gretap" + + ip netns del "$testns" +} + +kci_test_ip6gretap() +{ + testns="testns" + DEV_NS=ip6gretap00 + ret=0 + + ip netns add "$testns" + if [ $? -ne 0 ]; then + echo "SKIP ip6gretap tests: cannot add net namespace $testns" + return 1 + fi + + ip link help ip6gretap 2>&1 | grep -q "^Usage:" + if [ $? -ne 0 ];then + echo "SKIP: ip6gretap: iproute2 too old" + ip netns del "$testns" + return 1 + fi + + # test native tunnel + ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6gretap seq \ + key 102 local fc00:100::1 remote fc00:100::2 + check_err $? + + ip netns exec "$testns" ip addr add dev "$DEV_NS" fc00:200::1/96 + check_err $? + + ip netns exec "$testns" ip link set dev $DEV_NS up + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + # test external mode + ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6gretap external + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: ip6gretap" + ip netns del "$testns" + return 1 + fi + echo "PASS: ip6gretap" + + ip netns del "$testns" +} + +kci_test_erspan() +{ + testns="testns" + DEV_NS=erspan00 + ret=0 + + ip link help erspan 2>&1 | grep -q "^Usage:" + if [ $? -ne 0 ];then + echo "SKIP: erspan: iproute2 too old" + return 1 + fi + + ip netns add "$testns" + if [ $? -ne 0 ]; then + echo "SKIP erspan tests: cannot add net namespace $testns" + return 1 + fi + + # test native tunnel erspan v1 + ip netns exec "$testns" ip link add dev "$DEV_NS" type erspan seq \ + key 102 local 172.16.1.100 remote 172.16.1.200 \ + erspan_ver 1 erspan 488 + check_err $? + + ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24 + check_err $? + + ip netns exec "$testns" ip link set dev $DEV_NS up + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + # test native tunnel erspan v2 + ip netns exec "$testns" ip link add dev "$DEV_NS" type erspan seq \ + key 102 local 172.16.1.100 remote 172.16.1.200 \ + erspan_ver 2 erspan_dir ingress erspan_hwid 7 + check_err $? + + ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24 + check_err $? + + ip netns exec "$testns" ip link set dev $DEV_NS up + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + # test external mode + ip netns exec "$testns" ip link add dev "$DEV_NS" type erspan external + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: erspan" + ip netns del "$testns" + return 1 + fi + echo "PASS: erspan" + + ip netns del "$testns" +} + +kci_test_ip6erspan() +{ + testns="testns" + DEV_NS=ip6erspan00 + ret=0 + + ip link help ip6erspan 2>&1 | grep -q "^Usage:" + if [ $? -ne 0 ];then + echo "SKIP: ip6erspan: iproute2 too old" + return 1 + fi + + ip netns add "$testns" + if [ $? -ne 0 ]; then + echo "SKIP ip6erspan tests: cannot add net namespace $testns" + return 1 + fi + + # test native tunnel ip6erspan v1 + ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6erspan seq \ + key 102 local fc00:100::1 remote fc00:100::2 \ + erspan_ver 1 erspan 488 + check_err $? + + ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24 + check_err $? + + ip netns exec "$testns" ip link set dev $DEV_NS up + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + # test native tunnel ip6erspan v2 + ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6erspan seq \ + key 102 local fc00:100::1 remote fc00:100::2 \ + erspan_ver 2 erspan_dir ingress erspan_hwid 7 + check_err $? + + ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24 + check_err $? + + ip netns exec "$testns" ip link set dev $DEV_NS up + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + # test external mode + ip netns exec "$testns" ip link add dev "$DEV_NS" \ + type ip6erspan external + check_err $? + + ip netns exec "$testns" ip link del "$DEV_NS" + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: ip6erspan" + ip netns del "$testns" + return 1 + fi + echo "PASS: ip6erspan" + + ip netns del "$testns" +} + kci_test_rtnl() { kci_add_dummy @@ -514,6 +745,10 @@ kci_test_rtnl() kci_test_route_get kci_test_tc kci_test_gre + kci_test_gretap + kci_test_ip6gretap + kci_test_erspan + kci_test_ip6erspan kci_test_bridge kci_test_addrlabel kci_test_ifalias diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests index 21fe149e3de1..bea079edc278 100755 --- a/tools/testing/selftests/net/run_afpackettests +++ b/tools/testing/selftests/net/run_afpackettests @@ -9,7 +9,7 @@ fi echo "--------------------" echo "running psock_fanout test" echo "--------------------" -./psock_fanout +./in_netns.sh ./psock_fanout if [ $? -ne 0 ]; then echo "[FAIL]" else @@ -19,7 +19,7 @@ fi echo "--------------------" echo "running psock_tpacket test" echo "--------------------" -./psock_tpacket +./in_netns.sh ./psock_tpacket if [ $? -ne 0 ]; then echo "[FAIL]" else diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/networking/timestamping/txtimestamp.c index 5df07047ca86..81a98a240456 100644 --- a/tools/testing/selftests/networking/timestamping/txtimestamp.c +++ b/tools/testing/selftests/networking/timestamping/txtimestamp.c @@ -68,9 +68,11 @@ static int cfg_num_pkts = 4; static int do_ipv4 = 1; static int do_ipv6 = 1; static int cfg_payload_len = 10; +static int cfg_poll_timeout = 100; static bool cfg_show_payload; static bool cfg_do_pktinfo; static bool cfg_loop_nodata; +static bool cfg_no_delay; static uint16_t dest_port = 9000; static struct sockaddr_in daddr; @@ -171,7 +173,7 @@ static void __poll(int fd) memset(&pollfd, 0, sizeof(pollfd)); pollfd.fd = fd; - ret = poll(&pollfd, 1, 100); + ret = poll(&pollfd, 1, cfg_poll_timeout); if (ret != 1) error(1, errno, "poll"); } @@ -371,7 +373,8 @@ static void do_test(int family, unsigned int opt) error(1, errno, "send"); /* wait for all errors to be queued, else ACKs arrive OOO */ - usleep(50 * 1000); + if (!cfg_no_delay) + usleep(50 * 1000); __poll(fd); @@ -392,6 +395,9 @@ static void __attribute__((noreturn)) usage(const char *filepath) " -4: only IPv4\n" " -6: only IPv6\n" " -h: show this message\n" + " -c N: number of packets for each test\n" + " -D: no delay between packets\n" + " -F: poll() waits forever for an event\n" " -I: request PKTINFO\n" " -l N: send N bytes at a time\n" " -n: set no-payload option\n" @@ -409,7 +415,7 @@ static void parse_opt(int argc, char **argv) int proto_count = 0; char c; - while ((c = getopt(argc, argv, "46hIl:np:rRux")) != -1) { + while ((c = getopt(argc, argv, "46c:DFhIl:np:rRux")) != -1) { switch (c) { case '4': do_ipv6 = 0; @@ -417,6 +423,15 @@ static void parse_opt(int argc, char **argv) case '6': do_ipv4 = 0; break; + case 'c': + cfg_num_pkts = strtoul(optarg, NULL, 10); + break; + case 'D': + cfg_no_delay = true; + break; + case 'F': + cfg_poll_timeout = -1; + break; case 'I': cfg_do_pktinfo = true; break; diff --git a/tools/testing/selftests/nsfs/pidns.c b/tools/testing/selftests/nsfs/pidns.c index 1182d4e437a2..e0d86e1668c0 100644 --- a/tools/testing/selftests/nsfs/pidns.c +++ b/tools/testing/selftests/nsfs/pidns.c @@ -70,7 +70,7 @@ int main(int argc, char *argv[]) return pr_err("NS_GET_PARENT returned a wrong namespace"); if (ioctl(pns, NS_GET_PARENT) >= 0 || errno != EPERM) - return pr_err("Don't get EPERM");; + return pr_err("Don't get EPERM"); } kill(pid, SIGKILL); diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh index 5fc7ad359e21..08cbfbbc7029 100755 --- a/tools/testing/selftests/ntb/ntb_test.sh +++ b/tools/testing/selftests/ntb/ntb_test.sh @@ -18,7 +18,6 @@ LIST_DEVS=FALSE DEBUGFS=${DEBUGFS-/sys/kernel/debug} -DB_BITMASK=0x7FFF PERF_RUN_ORDER=32 MAX_MW_SIZE=0 RUN_DMA_TESTS= @@ -39,15 +38,17 @@ function show_help() echo "be highly recommended." echo echo "Options:" - echo " -b BITMASK doorbell clear bitmask for ntb_tool" echo " -C don't cleanup ntb modules on exit" - echo " -d run dma tests" echo " -h show this help message" echo " -l list available local and remote PCI ids" echo " -r REMOTE_HOST specify the remote's hostname to connect" - echo " to for the test (using ssh)" - echo " -p NUM ntb_perf run order (default: $PERF_RUN_ORDER)" - echo " -w max_mw_size maxmium memory window size" + echo " to for the test (using ssh)" + echo " -m MW_SIZE memory window size for ntb_tool" + echo " (default: $MW_SIZE)" + echo " -d run dma tests for ntb_perf" + echo " -p ORDER total data order for ntb_perf" + echo " (default: $PERF_RUN_ORDER)" + echo " -w MAX_MW_SIZE maxmium memory window size for ntb_perf" echo } @@ -56,7 +57,6 @@ function parse_args() OPTIND=0 while getopts "b:Cdhlm:r:p:w:" opt; do case "$opt" in - b) DB_BITMASK=${OPTARG} ;; C) DONT_CLEANUP=1 ;; d) RUN_DMA_TESTS=1 ;; h) show_help; exit 0 ;; @@ -87,7 +87,7 @@ set -e function _modprobe() { - modprobe "$@" + modprobe "$@" if [[ "$REMOTE_HOST" != "" ]]; then ssh "$REMOTE_HOST" modprobe "$@" @@ -127,15 +127,70 @@ function write_file() fi } +function check_file() +{ + split_remote $1 + + if [[ "$REMOTE" != "" ]]; then + ssh "$REMOTE" "[[ -e ${VPATH} ]]" + else + [[ -e ${VPATH} ]] + fi +} + +function subdirname() +{ + echo $(basename $(dirname $1)) 2> /dev/null +} + +function find_pidx() +{ + PORT=$1 + PPATH=$2 + + for ((i = 0; i < 64; i++)); do + PEER_DIR="$PPATH/peer$i" + + check_file ${PEER_DIR} || break + + PEER_PORT=$(read_file "${PEER_DIR}/port") + if [[ ${PORT} -eq $PEER_PORT ]]; then + echo $i + return 0 + fi + done + + return 1 +} + +function port_test() +{ + LOC=$1 + REM=$2 + + echo "Running port tests on: $(basename $LOC) / $(basename $REM)" + + LOCAL_PORT=$(read_file "$LOC/port") + REMOTE_PORT=$(read_file "$REM/port") + + LOCAL_PIDX=$(find_pidx ${REMOTE_PORT} "$LOC") + REMOTE_PIDX=$(find_pidx ${LOCAL_PORT} "$REM") + + echo "Local port ${LOCAL_PORT} with index ${REMOTE_PIDX} on remote host" + echo "Peer port ${REMOTE_PORT} with index ${LOCAL_PIDX} on local host" + + echo " Passed" +} + function link_test() { LOC=$1 REM=$2 EXP=0 - echo "Running link tests on: $(basename $LOC) / $(basename $REM)" + echo "Running link tests on: $(subdirname $LOC) / $(subdirname $REM)" - if ! write_file "N" "$LOC/link" 2> /dev/null; then + if ! write_file "N" "$LOC/../link" 2> /dev/null; then echo " Unsupported" return fi @@ -143,12 +198,11 @@ function link_test() write_file "N" "$LOC/link_event" if [[ $(read_file "$REM/link") != "N" ]]; then - echo "Expected remote link to be down in $REM/link" >&2 + echo "Expected link to be down in $REM/link" >&2 exit -1 fi - write_file "Y" "$LOC/link" - write_file "Y" "$LOC/link_event" + write_file "Y" "$LOC/../link" echo " Passed" } @@ -161,58 +215,136 @@ function doorbell_test() echo "Running db tests on: $(basename $LOC) / $(basename $REM)" - write_file "c $DB_BITMASK" "$REM/db" + DB_VALID_MASK=$(read_file "$LOC/db_valid_mask") + + write_file "c $DB_VALID_MASK" "$REM/db" - for ((i=1; i <= 8; i++)); do - let DB=$(read_file "$REM/db") || true - if [[ "$DB" != "$EXP" ]]; then + for ((i = 0; i < 64; i++)); do + DB=$(read_file "$REM/db") + if [[ "$DB" -ne "$EXP" ]]; then echo "Doorbell doesn't match expected value $EXP " \ "in $REM/db" >&2 exit -1 fi - let "MASK=1 << ($i-1)" || true - let "EXP=$EXP | $MASK" || true + let "MASK = (1 << $i) & $DB_VALID_MASK" || true + let "EXP = $EXP | $MASK" || true + write_file "s $MASK" "$LOC/peer_db" done + write_file "c $DB_VALID_MASK" "$REM/db_mask" + write_file $DB_VALID_MASK "$REM/db_event" + write_file "s $DB_VALID_MASK" "$REM/db_mask" + + write_file "c $DB_VALID_MASK" "$REM/db" + echo " Passed" } -function read_spad() +function get_files_count() { - VPATH=$1 - IDX=$2 + NAME=$1 + LOC=$2 + + split_remote $LOC - ROW=($(read_file "$VPATH" | grep -e "^$IDX")) - let VAL=${ROW[1]} || true - echo $VAL + if [[ "$REMOTE" == "" ]]; then + echo $(ls -1 "$LOC"/${NAME}* 2>/dev/null | wc -l) + else + echo $(ssh "$REMOTE" "ls -1 \"$VPATH\"/${NAME}* | \ + wc -l" 2> /dev/null) + fi } function scratchpad_test() { LOC=$1 REM=$2 - CNT=$(read_file "$LOC/spad" | wc -l) - echo "Running spad tests on: $(basename $LOC) / $(basename $REM)" + echo "Running spad tests on: $(subdirname $LOC) / $(subdirname $REM)" + + CNT=$(get_files_count "spad" "$LOC") + + if [[ $CNT -eq 0 ]]; then + echo " Unsupported" + return + fi for ((i = 0; i < $CNT; i++)); do VAL=$RANDOM - write_file "$i $VAL" "$LOC/peer_spad" - RVAL=$(read_spad "$REM/spad" $i) + write_file "$VAL" "$LOC/spad$i" + RVAL=$(read_file "$REM/../spad$i") - if [[ "$VAL" != "$RVAL" ]]; then - echo "Scratchpad doesn't match expected value $VAL " \ - "in $REM/spad, got $RVAL" >&2 + if [[ "$VAL" -ne "$RVAL" ]]; then + echo "Scratchpad $i value $RVAL doesn't match $VAL" >&2 exit -1 fi + done + + echo " Passed" +} + +function message_test() +{ + LOC=$1 + REM=$2 + + echo "Running msg tests on: $(subdirname $LOC) / $(subdirname $REM)" + + CNT=$(get_files_count "msg" "$LOC") + if [[ $CNT -eq 0 ]]; then + echo " Unsupported" + return + fi + + MSG_OUTBITS_MASK=$(read_file "$LOC/../msg_inbits") + MSG_INBITS_MASK=$(read_file "$REM/../msg_inbits") + + write_file "c $MSG_OUTBITS_MASK" "$LOC/../msg_sts" + write_file "c $MSG_INBITS_MASK" "$REM/../msg_sts" + + for ((i = 0; i < $CNT; i++)); do + VAL=$RANDOM + write_file "$VAL" "$LOC/msg$i" + RVAL=$(read_file "$REM/../msg$i") + + if [[ "$VAL" -ne "${RVAL%%<-*}" ]]; then + echo "Message $i value $RVAL doesn't match $VAL" >&2 + exit -1 + fi done echo " Passed" } +function get_number() +{ + KEY=$1 + + sed -n "s/^\(${KEY}\)[ \t]*\(0x[0-9a-fA-F]*\)\(\[p\]\)\?$/\2/p" +} + +function mw_alloc() +{ + IDX=$1 + LOC=$2 + REM=$3 + + write_file $MW_SIZE "$LOC/mw_trans$IDX" + + INB_MW=$(read_file "$LOC/mw_trans$IDX") + MW_ALIGNED_SIZE=$(echo "$INB_MW" | get_number "Window Size") + MW_DMA_ADDR=$(echo "$INB_MW" | get_number "DMA Address") + + write_file "$MW_DMA_ADDR:$(($MW_ALIGNED_SIZE))" "$REM/peer_mw_trans$IDX" + + if [[ $MW_SIZE -ne $MW_ALIGNED_SIZE ]]; then + echo "MW $IDX size aligned to $MW_ALIGNED_SIZE" + fi +} + function write_mw() { split_remote $2 @@ -225,17 +357,15 @@ function write_mw() fi } -function mw_test() +function mw_check() { IDX=$1 LOC=$2 REM=$3 - echo "Running $IDX tests on: $(basename $LOC) / $(basename $REM)" + write_mw "$LOC/mw$IDX" - write_mw "$LOC/$IDX" - - split_remote "$LOC/$IDX" + split_remote "$LOC/mw$IDX" if [[ "$REMOTE" == "" ]]; then A=$VPATH else @@ -243,7 +373,7 @@ function mw_test() ssh "$REMOTE" cat "$VPATH" > "$A" fi - split_remote "$REM/peer_$IDX" + split_remote "$REM/peer_mw$IDX" if [[ "$REMOTE" == "" ]]; then B=$VPATH else @@ -251,7 +381,7 @@ function mw_test() ssh "$REMOTE" cat "$VPATH" > "$B" fi - cmp -n $MW_SIZE "$A" "$B" + cmp -n $MW_ALIGNED_SIZE "$A" "$B" if [[ $? != 0 ]]; then echo "Memory window $MW did not match!" >&2 fi @@ -263,8 +393,39 @@ function mw_test() if [[ "$B" == "/tmp/*" ]]; then rm "$B" fi +} + +function mw_free() +{ + IDX=$1 + LOC=$2 + REM=$3 + + write_file "$MW_DMA_ADDR:0" "$REM/peer_mw_trans$IDX" + + write_file 0 "$LOC/mw_trans$IDX" +} + +function mw_test() +{ + LOC=$1 + REM=$2 + + CNT=$(get_files_count "mw_trans" "$LOC") + + for ((i = 0; i < $CNT; i++)); do + echo "Running mw$i tests on: $(subdirname $LOC) / " \ + "$(subdirname $REM)" + + mw_alloc $i $LOC $REM + + mw_check $i $LOC $REM + + mw_free $i $LOC $REM + + echo " Passed" + done - echo " Passed" } function pingpong_test() @@ -274,13 +435,13 @@ function pingpong_test() echo "Running ping pong tests on: $(basename $LOC) / $(basename $REM)" - LOC_START=$(read_file $LOC/count) - REM_START=$(read_file $REM/count) + LOC_START=$(read_file "$LOC/count") + REM_START=$(read_file "$REM/count") sleep 7 - LOC_END=$(read_file $LOC/count) - REM_END=$(read_file $REM/count) + LOC_END=$(read_file "$LOC/count") + REM_END=$(read_file "$REM/count") if [[ $LOC_START == $LOC_END ]] || [[ $REM_START == $REM_END ]]; then echo "Ping pong counter not incrementing!" >&2 @@ -300,19 +461,19 @@ function perf_test() WITH="without" fi - _modprobe ntb_perf run_order=$PERF_RUN_ORDER \ + _modprobe ntb_perf total_order=$PERF_RUN_ORDER \ max_mw_size=$MAX_MW_SIZE use_dma=$USE_DMA echo "Running local perf test $WITH DMA" - write_file "" $LOCAL_PERF/run + write_file "$LOCAL_PIDX" "$LOCAL_PERF/run" echo -n " " - read_file $LOCAL_PERF/run + read_file "$LOCAL_PERF/run" echo " Passed" echo "Running remote perf test $WITH DMA" - write_file "" $REMOTE_PERF/run + write_file "$REMOTE_PIDX" "$REMOTE_PERF/run" echo -n " " - read_file $REMOTE_PERF/run + read_file "$REMOTE_PERF/run" echo " Passed" _modprobe -r ntb_perf @@ -320,48 +481,44 @@ function perf_test() function ntb_tool_tests() { - LOCAL_TOOL=$DEBUGFS/ntb_tool/$LOCAL_DEV - REMOTE_TOOL=$REMOTE_HOST:$DEBUGFS/ntb_tool/$REMOTE_DEV + LOCAL_TOOL="$DEBUGFS/ntb_tool/$LOCAL_DEV" + REMOTE_TOOL="$REMOTE_HOST:$DEBUGFS/ntb_tool/$REMOTE_DEV" echo "Starting ntb_tool tests..." _modprobe ntb_tool - write_file Y $LOCAL_TOOL/link_event - write_file Y $REMOTE_TOOL/link_event + port_test "$LOCAL_TOOL" "$REMOTE_TOOL" - link_test $LOCAL_TOOL $REMOTE_TOOL - link_test $REMOTE_TOOL $LOCAL_TOOL + LOCAL_PEER_TOOL="$LOCAL_TOOL/peer$LOCAL_PIDX" + REMOTE_PEER_TOOL="$REMOTE_TOOL/peer$REMOTE_PIDX" + + link_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL" + link_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL" #Ensure the link is up on both sides before continuing - write_file Y $LOCAL_TOOL/link_event - write_file Y $REMOTE_TOOL/link_event + write_file "Y" "$LOCAL_PEER_TOOL/link_event" + write_file "Y" "$REMOTE_PEER_TOOL/link_event" - for PEER_TRANS in $(ls $LOCAL_TOOL/peer_trans*); do - PT=$(basename $PEER_TRANS) - write_file $MW_SIZE $LOCAL_TOOL/$PT - write_file $MW_SIZE $REMOTE_TOOL/$PT - done + doorbell_test "$LOCAL_TOOL" "$REMOTE_TOOL" + doorbell_test "$REMOTE_TOOL" "$LOCAL_TOOL" - doorbell_test $LOCAL_TOOL $REMOTE_TOOL - doorbell_test $REMOTE_TOOL $LOCAL_TOOL - scratchpad_test $LOCAL_TOOL $REMOTE_TOOL - scratchpad_test $REMOTE_TOOL $LOCAL_TOOL + scratchpad_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL" + scratchpad_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL" - for MW in $(ls $LOCAL_TOOL/mw*); do - MW=$(basename $MW) + message_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL" + message_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL" - mw_test $MW $LOCAL_TOOL $REMOTE_TOOL - mw_test $MW $REMOTE_TOOL $LOCAL_TOOL - done + mw_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL" + mw_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL" _modprobe -r ntb_tool } function ntb_pingpong_tests() { - LOCAL_PP=$DEBUGFS/ntb_pingpong/$LOCAL_DEV - REMOTE_PP=$REMOTE_HOST:$DEBUGFS/ntb_pingpong/$REMOTE_DEV + LOCAL_PP="$DEBUGFS/ntb_pingpong/$LOCAL_DEV" + REMOTE_PP="$REMOTE_HOST:$DEBUGFS/ntb_pingpong/$REMOTE_DEV" echo "Starting ntb_pingpong tests..." @@ -374,8 +531,8 @@ function ntb_pingpong_tests() function ntb_perf_tests() { - LOCAL_PERF=$DEBUGFS/ntb_perf/$LOCAL_DEV - REMOTE_PERF=$REMOTE_HOST:$DEBUGFS/ntb_perf/$REMOTE_DEV + LOCAL_PERF="$DEBUGFS/ntb_perf/$LOCAL_DEV" + REMOTE_PERF="$REMOTE_HOST:$DEBUGFS/ntb_perf/$REMOTE_DEV" echo "Starting ntb_perf tests..." diff --git a/tools/testing/selftests/powerpc/alignment/Makefile b/tools/testing/selftests/powerpc/alignment/Makefile index 16b22004e75f..083a48a008b4 100644 --- a/tools/testing/selftests/powerpc/alignment/Makefile +++ b/tools/testing/selftests/powerpc/alignment/Makefile @@ -1,4 +1,5 @@ -TEST_GEN_PROGS := copy_unaligned copy_first_unaligned paste_unaligned paste_last_unaligned +TEST_GEN_PROGS := copy_unaligned copy_first_unaligned paste_unaligned \ + paste_last_unaligned alignment_handler include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c new file mode 100644 index 000000000000..0f2698f9fd6d --- /dev/null +++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c @@ -0,0 +1,491 @@ +/* + * Test the powerpc alignment handler on POWER8/POWER9 + * + * Copyright (C) 2017 IBM Corporation (Michael Neuling, Andrew Donnellan) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * This selftest exercises the powerpc alignment fault handler. + * + * We create two sets of source and destination buffers, one in regular memory, + * the other cache-inhibited (we use /dev/fb0 for this). + * + * We initialise the source buffers, then use whichever set of load/store + * instructions is under test to copy bytes from the source buffers to the + * destination buffers. For the regular buffers, these instructions will + * execute normally. For the cache-inhibited buffers, these instructions + * will trap and cause an alignment fault, and the alignment fault handler + * will emulate the particular instruction under test. We then compare the + * destination buffers to ensure that the native and emulated cases give the + * same result. + * + * TODO: + * - Any FIXMEs below + * - Test VSX regs < 32 and > 32 + * - Test all loads and stores + * - Check update forms do update register + * - Test alignment faults over page boundary + * + * Some old binutils may not support all the instructions. + */ + + +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <getopt.h> +#include <setjmp.h> +#include <signal.h> + +#include "utils.h" + +int bufsize; +int debug; +int testing; +volatile int gotsig; + +void sighandler(int sig, siginfo_t *info, void *ctx) +{ + ucontext_t *ucp = ctx; + + if (!testing) { + signal(sig, SIG_DFL); + kill(0, sig); + } + gotsig = sig; +#ifdef __powerpc64__ + ucp->uc_mcontext.gp_regs[PT_NIP] += 4; +#else + ucp->uc_mcontext.uc_regs->gregs[PT_NIP] += 4; +#endif +} + +#define XFORM(reg, n) " " #reg " ,%"#n",%2 ;" +#define DFORM(reg, n) " " #reg " ,0(%"#n") ;" + +#define TEST(name, ld_op, st_op, form, ld_reg, st_reg) \ + void test_##name(char *s, char *d) \ + { \ + asm volatile( \ + #ld_op form(ld_reg, 0) \ + #st_op form(st_reg, 1) \ + :: "r"(s), "r"(d), "r"(0) \ + : "memory", "vs0", "vs32", "r31"); \ + } \ + rc |= do_test(#name, test_##name) + +#define LOAD_VSX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 32, 32) +#define STORE_VSX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 32) +#define LOAD_VSX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 32, 32) +#define STORE_VSX_DFORM_TEST(op) TEST(op, lxv, op, DFORM, 32, 32) +#define LOAD_VMX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 0, 32) +#define STORE_VMX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 0) +#define LOAD_VMX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 0, 32) +#define STORE_VMX_DFORM_TEST(op) TEST(op, lxv, op, DFORM, 32, 0) + +#define LOAD_XFORM_TEST(op) TEST(op, op, stdx, XFORM, 31, 31) +#define STORE_XFORM_TEST(op) TEST(op, ldx, op, XFORM, 31, 31) +#define LOAD_DFORM_TEST(op) TEST(op, op, std, DFORM, 31, 31) +#define STORE_DFORM_TEST(op) TEST(op, ld, op, DFORM, 31, 31) + +#define LOAD_FLOAT_DFORM_TEST(op) TEST(op, op, stfd, DFORM, 0, 0) +#define STORE_FLOAT_DFORM_TEST(op) TEST(op, lfd, op, DFORM, 0, 0) +#define LOAD_FLOAT_XFORM_TEST(op) TEST(op, op, stfdx, XFORM, 0, 0) +#define STORE_FLOAT_XFORM_TEST(op) TEST(op, lfdx, op, XFORM, 0, 0) + + +/* FIXME: Unimplemented tests: */ +// STORE_DFORM_TEST(stq) /* FIXME: need two registers for quad */ +// STORE_DFORM_TEST(stswi) /* FIXME: string instruction */ + +// STORE_XFORM_TEST(stwat) /* AMO can't emulate or run on CI */ +// STORE_XFORM_TEST(stdat) /* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ */ + + +/* preload byte by byte */ +void preload_data(void *dst, int offset, int width) +{ + char *c = dst; + int i; + + c += offset; + + for (i = 0 ; i < width ; i++) + c[i] = i; +} + +int test_memcpy(void *dst, void *src, int size, int offset, + void (*test_func)(char *, char *)) +{ + char *s, *d; + + s = src; + s += offset; + d = dst; + d += offset; + + assert(size == 16); + gotsig = 0; + testing = 1; + + test_func(s, d); /* run the actual test */ + + testing = 0; + if (gotsig) { + if (debug) + printf(" Got signal %i\n", gotsig); + return 1; + } + return 0; +} + +void dumpdata(char *s1, char *s2, int n, char *test_name) +{ + int i; + + printf(" %s: unexpected result:\n", test_name); + printf(" mem:"); + for (i = 0; i < n; i++) + printf(" %02x", s1[i]); + printf("\n"); + printf(" ci: "); + for (i = 0; i < n; i++) + printf(" %02x", s2[i]); + printf("\n"); +} + +int test_memcmp(void *s1, void *s2, int n, int offset, char *test_name) +{ + char *s1c, *s2c; + + s1c = s1; + s1c += offset; + s2c = s2; + s2c += offset; + + if (memcmp(s1c, s2c, n)) { + if (debug) { + printf("\n Compare failed. Offset:%i length:%i\n", + offset, n); + dumpdata(s1c, s2c, n, test_name); + } + return 1; + } + return 0; +} + +/* + * Do two memcpy tests using the same instructions. One cachable + * memory and the other doesn't. + */ +int do_test(char *test_name, void (*test_func)(char *, char *)) +{ + int offset, width, fd, rc = 0, r; + void *mem0, *mem1, *ci0, *ci1; + + printf("\tDoing %s:\t", test_name); + + fd = open("/dev/fb0", O_RDWR); + if (fd < 0) { + printf("\n"); + perror("Can't open /dev/fb0"); + SKIP_IF(1); + } + + ci0 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED, + fd, 0x0); + ci1 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED, + fd, bufsize); + if ((ci0 == MAP_FAILED) || (ci1 == MAP_FAILED)) { + printf("\n"); + perror("mmap failed"); + SKIP_IF(1); + } + + rc = posix_memalign(&mem0, bufsize, bufsize); + if (rc) { + printf("\n"); + return rc; + } + + rc = posix_memalign(&mem1, bufsize, bufsize); + if (rc) { + printf("\n"); + free(mem0); + return rc; + } + + /* offset = 0 no alignment fault, so skip */ + for (offset = 1; offset < 16; offset++) { + width = 16; /* vsx == 16 bytes */ + r = 0; + + /* load pattern into memory byte by byte */ + preload_data(ci0, offset, width); + preload_data(mem0, offset, width); // FIXME: remove?? + memcpy(ci0, mem0, bufsize); + memcpy(ci1, mem1, bufsize); /* initialise output to the same */ + + /* sanity check */ + test_memcmp(mem0, ci0, width, offset, test_name); + + r |= test_memcpy(ci1, ci0, width, offset, test_func); + r |= test_memcpy(mem1, mem0, width, offset, test_func); + if (r && !debug) { + printf("FAILED: Got signal"); + break; + } + + r |= test_memcmp(mem1, ci1, width, offset, test_name); + rc |= r; + if (r && !debug) { + printf("FAILED: Wrong Data"); + break; + } + } + if (!r) + printf("PASSED"); + printf("\n"); + + munmap(ci0, bufsize); + munmap(ci1, bufsize); + free(mem0); + free(mem1); + + return rc; +} + +int test_alignment_handler_vsx_206(void) +{ + int rc = 0; + + printf("VSX: 2.06B\n"); + LOAD_VSX_XFORM_TEST(lxvd2x); + LOAD_VSX_XFORM_TEST(lxvw4x); + LOAD_VSX_XFORM_TEST(lxsdx); + LOAD_VSX_XFORM_TEST(lxvdsx); + STORE_VSX_XFORM_TEST(stxvd2x); + STORE_VSX_XFORM_TEST(stxvw4x); + STORE_VSX_XFORM_TEST(stxsdx); + return rc; +} + +int test_alignment_handler_vsx_207(void) +{ + int rc = 0; + + printf("VSX: 2.07B\n"); + LOAD_VSX_XFORM_TEST(lxsspx); + LOAD_VSX_XFORM_TEST(lxsiwax); + LOAD_VSX_XFORM_TEST(lxsiwzx); + STORE_VSX_XFORM_TEST(stxsspx); + STORE_VSX_XFORM_TEST(stxsiwx); + return rc; +} + +int test_alignment_handler_vsx_300(void) +{ + int rc = 0; + + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00)); + printf("VSX: 3.00B\n"); + LOAD_VMX_DFORM_TEST(lxsd); + LOAD_VSX_XFORM_TEST(lxsibzx); + LOAD_VSX_XFORM_TEST(lxsihzx); + LOAD_VMX_DFORM_TEST(lxssp); + LOAD_VSX_DFORM_TEST(lxv); + LOAD_VSX_XFORM_TEST(lxvb16x); + LOAD_VSX_XFORM_TEST(lxvh8x); + LOAD_VSX_XFORM_TEST(lxvx); + LOAD_VSX_XFORM_TEST(lxvwsx); + LOAD_VSX_XFORM_TEST(lxvl); + LOAD_VSX_XFORM_TEST(lxvll); + STORE_VMX_DFORM_TEST(stxsd); + STORE_VSX_XFORM_TEST(stxsibx); + STORE_VSX_XFORM_TEST(stxsihx); + STORE_VMX_DFORM_TEST(stxssp); + STORE_VSX_DFORM_TEST(stxv); + STORE_VSX_XFORM_TEST(stxvb16x); + STORE_VSX_XFORM_TEST(stxvh8x); + STORE_VSX_XFORM_TEST(stxvx); + STORE_VSX_XFORM_TEST(stxvl); + STORE_VSX_XFORM_TEST(stxvll); + return rc; +} + +int test_alignment_handler_integer(void) +{ + int rc = 0; + + printf("Integer\n"); + LOAD_DFORM_TEST(lbz); + LOAD_DFORM_TEST(lbzu); + LOAD_XFORM_TEST(lbzx); + LOAD_XFORM_TEST(lbzux); + LOAD_DFORM_TEST(lhz); + LOAD_DFORM_TEST(lhzu); + LOAD_XFORM_TEST(lhzx); + LOAD_XFORM_TEST(lhzux); + LOAD_DFORM_TEST(lha); + LOAD_DFORM_TEST(lhau); + LOAD_XFORM_TEST(lhax); + LOAD_XFORM_TEST(lhaux); + LOAD_XFORM_TEST(lhbrx); + LOAD_DFORM_TEST(lwz); + LOAD_DFORM_TEST(lwzu); + LOAD_XFORM_TEST(lwzx); + LOAD_XFORM_TEST(lwzux); + LOAD_DFORM_TEST(lwa); + LOAD_XFORM_TEST(lwax); + LOAD_XFORM_TEST(lwaux); + LOAD_XFORM_TEST(lwbrx); + LOAD_DFORM_TEST(ld); + LOAD_DFORM_TEST(ldu); + LOAD_XFORM_TEST(ldx); + LOAD_XFORM_TEST(ldux); + LOAD_XFORM_TEST(ldbrx); + LOAD_DFORM_TEST(lmw); + STORE_DFORM_TEST(stb); + STORE_XFORM_TEST(stbx); + STORE_DFORM_TEST(stbu); + STORE_XFORM_TEST(stbux); + STORE_DFORM_TEST(sth); + STORE_XFORM_TEST(sthx); + STORE_DFORM_TEST(sthu); + STORE_XFORM_TEST(sthux); + STORE_XFORM_TEST(sthbrx); + STORE_DFORM_TEST(stw); + STORE_XFORM_TEST(stwx); + STORE_DFORM_TEST(stwu); + STORE_XFORM_TEST(stwux); + STORE_XFORM_TEST(stwbrx); + STORE_DFORM_TEST(std); + STORE_XFORM_TEST(stdx); + STORE_DFORM_TEST(stdu); + STORE_XFORM_TEST(stdux); + STORE_XFORM_TEST(stdbrx); + STORE_DFORM_TEST(stmw); + return rc; +} + +int test_alignment_handler_vmx(void) +{ + int rc = 0; + + printf("VMX\n"); + LOAD_VMX_XFORM_TEST(lvx); + + /* + * FIXME: These loads only load part of the register, so our + * testing method doesn't work. Also they don't take alignment + * faults, so it's kinda pointless anyway + * + LOAD_VMX_XFORM_TEST(lvebx) + LOAD_VMX_XFORM_TEST(lvehx) + LOAD_VMX_XFORM_TEST(lvewx) + LOAD_VMX_XFORM_TEST(lvxl) + */ + STORE_VMX_XFORM_TEST(stvx); + STORE_VMX_XFORM_TEST(stvebx); + STORE_VMX_XFORM_TEST(stvehx); + STORE_VMX_XFORM_TEST(stvewx); + STORE_VMX_XFORM_TEST(stvxl); + return rc; +} + +int test_alignment_handler_fp(void) +{ + int rc = 0; + + printf("Floating point\n"); + LOAD_FLOAT_DFORM_TEST(lfd); + LOAD_FLOAT_XFORM_TEST(lfdx); + LOAD_FLOAT_DFORM_TEST(lfdp); + LOAD_FLOAT_XFORM_TEST(lfdpx); + LOAD_FLOAT_DFORM_TEST(lfdu); + LOAD_FLOAT_XFORM_TEST(lfdux); + LOAD_FLOAT_DFORM_TEST(lfs); + LOAD_FLOAT_XFORM_TEST(lfsx); + LOAD_FLOAT_DFORM_TEST(lfsu); + LOAD_FLOAT_XFORM_TEST(lfsux); + LOAD_FLOAT_XFORM_TEST(lfiwzx); + LOAD_FLOAT_XFORM_TEST(lfiwax); + STORE_FLOAT_DFORM_TEST(stfd); + STORE_FLOAT_XFORM_TEST(stfdx); + STORE_FLOAT_DFORM_TEST(stfdp); + STORE_FLOAT_XFORM_TEST(stfdpx); + STORE_FLOAT_DFORM_TEST(stfdu); + STORE_FLOAT_XFORM_TEST(stfdux); + STORE_FLOAT_DFORM_TEST(stfs); + STORE_FLOAT_XFORM_TEST(stfsx); + STORE_FLOAT_DFORM_TEST(stfsu); + STORE_FLOAT_XFORM_TEST(stfsux); + STORE_FLOAT_XFORM_TEST(stfiwx); + + return rc; +} + +void usage(char *prog) +{ + printf("Usage: %s [options]\n", prog); + printf(" -d Enable debug error output\n"); + printf("\n"); + printf("This test requires a POWER8 or POWER9 CPU and a usable "); + printf("framebuffer at /dev/fb0.\n"); +} + +int main(int argc, char *argv[]) +{ + + struct sigaction sa; + int rc = 0; + int option = 0; + + while ((option = getopt(argc, argv, "d")) != -1) { + switch (option) { + case 'd': + debug++; + break; + default: + usage(argv[0]); + exit(1); + } + } + + bufsize = getpagesize(); + + sa.sa_sigaction = sighandler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGSEGV, &sa, NULL) == -1 + || sigaction(SIGBUS, &sa, NULL) == -1 + || sigaction(SIGILL, &sa, NULL) == -1) { + perror("sigaction"); + exit(1); + } + + rc |= test_harness(test_alignment_handler_vsx_206, + "test_alignment_handler_vsx_206"); + rc |= test_harness(test_alignment_handler_vsx_207, + "test_alignment_handler_vsx_207"); + rc |= test_harness(test_alignment_handler_vsx_300, + "test_alignment_handler_vsx_300"); + rc |= test_harness(test_alignment_handler_integer, + "test_alignment_handler_integer"); + rc |= test_harness(test_alignment_handler_vmx, + "test_alignment_handler_vmx"); + rc |= test_harness(test_alignment_handler_fp, + "test_alignment_handler_fp"); + return rc; +} diff --git a/tools/testing/selftests/powerpc/benchmarks/.gitignore b/tools/testing/selftests/powerpc/benchmarks/.gitignore index 04dc1e6ef2ce..9161679b1e1a 100644 --- a/tools/testing/selftests/powerpc/benchmarks/.gitignore +++ b/tools/testing/selftests/powerpc/benchmarks/.gitignore @@ -1,5 +1,7 @@ gettimeofday context_switch +fork +exec_target mmap_bench futex_bench null_syscall diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile index a35058e3766c..b4d7432a0ecd 100644 --- a/tools/testing/selftests/powerpc/benchmarks/Makefile +++ b/tools/testing/selftests/powerpc/benchmarks/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_GEN_PROGS := gettimeofday context_switch mmap_bench futex_bench null_syscall +TEST_GEN_PROGS := gettimeofday context_switch fork mmap_bench futex_bench null_syscall +TEST_GEN_FILES := exec_target CFLAGS += -O2 @@ -10,3 +11,7 @@ $(TEST_GEN_PROGS): ../harness.c $(OUTPUT)/context_switch: ../utils.c $(OUTPUT)/context_switch: CFLAGS += -maltivec -mvsx -mabi=altivec $(OUTPUT)/context_switch: LDLIBS += -lpthread + +$(OUTPUT)/fork: LDLIBS += -lpthread + +$(OUTPUT)/exec_target: CFLAGS += -static -nostartfiles diff --git a/tools/testing/selftests/powerpc/benchmarks/exec_target.c b/tools/testing/selftests/powerpc/benchmarks/exec_target.c new file mode 100644 index 000000000000..3c9c144192be --- /dev/null +++ b/tools/testing/selftests/powerpc/benchmarks/exec_target.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Part of fork context switch microbenchmark. + * + * Copyright 2018, Anton Blanchard, IBM Corp. + */ + +void _exit(int); +void _start(void) +{ + _exit(0); +} diff --git a/tools/testing/selftests/powerpc/benchmarks/fork.c b/tools/testing/selftests/powerpc/benchmarks/fork.c new file mode 100644 index 000000000000..d312e638cb37 --- /dev/null +++ b/tools/testing/selftests/powerpc/benchmarks/fork.c @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Context switch microbenchmark. + * + * Copyright 2018, Anton Blanchard, IBM Corp. + */ + +#define _GNU_SOURCE +#include <assert.h> +#include <errno.h> +#include <getopt.h> +#include <limits.h> +#include <linux/futex.h> +#include <pthread.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/shm.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +static unsigned int timeout = 30; + +static void set_cpu(int cpu) +{ + cpu_set_t cpuset; + + if (cpu == -1) + return; + + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + + if (sched_setaffinity(0, sizeof(cpuset), &cpuset)) { + perror("sched_setaffinity"); + exit(1); + } +} + +static void start_process_on(void *(*fn)(void *), void *arg, int cpu) +{ + int pid; + + pid = fork(); + if (pid == -1) { + perror("fork"); + exit(1); + } + + if (pid) + return; + + set_cpu(cpu); + + fn(arg); + + exit(0); +} + +static int cpu; +static int do_fork = 0; +static int do_vfork = 0; +static int do_exec = 0; +static char *exec_file; +static int exec_target = 0; +static unsigned long iterations; +static unsigned long iterations_prev; + +static void run_exec(void) +{ + char *const argv[] = { "./exec_target", NULL }; + + if (execve("./exec_target", argv, NULL) == -1) { + perror("execve"); + exit(1); + } +} + +static void bench_fork(void) +{ + while (1) { + pid_t pid = fork(); + if (pid == -1) { + perror("fork"); + exit(1); + } + if (pid == 0) { + if (do_exec) + run_exec(); + _exit(0); + } + pid = waitpid(pid, NULL, 0); + if (pid == -1) { + perror("waitpid"); + exit(1); + } + iterations++; + } +} + +static void bench_vfork(void) +{ + while (1) { + pid_t pid = vfork(); + if (pid == -1) { + perror("fork"); + exit(1); + } + if (pid == 0) { + if (do_exec) + run_exec(); + _exit(0); + } + pid = waitpid(pid, NULL, 0); + if (pid == -1) { + perror("waitpid"); + exit(1); + } + iterations++; + } +} + +static void *null_fn(void *arg) +{ + pthread_exit(NULL); +} + +static void bench_thread(void) +{ + pthread_t tid; + cpu_set_t cpuset; + pthread_attr_t attr; + int rc; + + rc = pthread_attr_init(&attr); + if (rc) { + errno = rc; + perror("pthread_attr_init"); + exit(1); + } + + if (cpu != -1) { + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + + rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset); + if (rc) { + errno = rc; + perror("pthread_attr_setaffinity_np"); + exit(1); + } + } + + while (1) { + rc = pthread_create(&tid, &attr, null_fn, NULL); + if (rc) { + errno = rc; + perror("pthread_create"); + exit(1); + } + rc = pthread_join(tid, NULL); + if (rc) { + errno = rc; + perror("pthread_join"); + exit(1); + } + iterations++; + } +} + +static void sigalrm_handler(int junk) +{ + unsigned long i = iterations; + + printf("%ld\n", i - iterations_prev); + iterations_prev = i; + + if (--timeout == 0) + kill(0, SIGUSR1); + + alarm(1); +} + +static void sigusr1_handler(int junk) +{ + exit(0); +} + +static void *bench_proc(void *arg) +{ + signal(SIGALRM, sigalrm_handler); + alarm(1); + + if (do_fork) + bench_fork(); + else if (do_vfork) + bench_vfork(); + else + bench_thread(); + + return NULL; +} + +static struct option options[] = { + { "fork", no_argument, &do_fork, 1 }, + { "vfork", no_argument, &do_vfork, 1 }, + { "exec", no_argument, &do_exec, 1 }, + { "timeout", required_argument, 0, 's' }, + { "exec-target", no_argument, &exec_target, 1 }, + { NULL }, +}; + +static void usage(void) +{ + fprintf(stderr, "Usage: fork <options> CPU\n\n"); + fprintf(stderr, "\t\t--fork\tUse fork() (default threads)\n"); + fprintf(stderr, "\t\t--vfork\tUse vfork() (default threads)\n"); + fprintf(stderr, "\t\t--exec\tAlso exec() (default no exec)\n"); + fprintf(stderr, "\t\t--timeout=X\tDuration in seconds to run (default 30)\n"); + fprintf(stderr, "\t\t--exec-target\tInternal option for exec workload\n"); +} + +int main(int argc, char *argv[]) +{ + signed char c; + + while (1) { + int option_index = 0; + + c = getopt_long(argc, argv, "", options, &option_index); + + if (c == -1) + break; + + switch (c) { + case 0: + if (options[option_index].flag != 0) + break; + + usage(); + exit(1); + break; + + case 's': + timeout = atoi(optarg); + break; + + default: + usage(); + exit(1); + } + } + + if (do_fork && do_vfork) { + usage(); + exit(1); + } + if (do_exec && !do_fork && !do_vfork) { + usage(); + exit(1); + } + + if (do_exec) { + char *dirname = strdup(argv[0]); + int i; + i = strlen(dirname) - 1; + while (i) { + if (dirname[i] == '/') { + dirname[i] = '\0'; + if (chdir(dirname) == -1) { + perror("chdir"); + exit(1); + } + break; + } + i--; + } + } + + if (exec_target) { + exit(0); + } + + if (((argc - optind) != 1)) { + cpu = -1; + } else { + cpu = atoi(argv[optind++]); + } + + if (do_exec) + exec_file = argv[0]; + + set_cpu(cpu); + + printf("Using "); + if (do_fork) + printf("fork"); + else if (do_vfork) + printf("vfork"); + else + printf("clone"); + + if (do_exec) + printf(" + exec"); + + printf(" on cpu %d\n", cpu); + + /* Create a new process group so we can signal everyone for exit */ + setpgid(getpid(), getpid()); + + signal(SIGUSR1, sigusr1_handler); + + start_process_on(bench_proc, NULL, cpu); + + while (1) + sleep(3600); + + return 0; +} diff --git a/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c b/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c index 8d084a2d6e74..7a0a462a2272 100644 --- a/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c +++ b/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c @@ -7,17 +7,34 @@ #include <stdlib.h> #include <sys/mman.h> #include <time.h> +#include <getopt.h> #include "utils.h" #define ITERATIONS 5000000 -#define MEMSIZE (128 * 1024 * 1024) +#define MEMSIZE (1UL << 27) +#define PAGE_SIZE (1UL << 16) +#define CHUNK_COUNT (MEMSIZE/PAGE_SIZE) + +static int pg_fault; +static int iterations = ITERATIONS; + +static struct option options[] = { + { "pgfault", no_argument, &pg_fault, 1 }, + { "iterations", required_argument, 0, 'i' }, + { 0, }, +}; + +static void usage(void) +{ + printf("mmap_bench <--pgfault> <--iterations count>\n"); +} int test_mmap(void) { struct timespec ts_start, ts_end; - unsigned long i = ITERATIONS; + unsigned long i = iterations; clock_gettime(CLOCK_MONOTONIC, &ts_start); @@ -25,6 +42,11 @@ int test_mmap(void) char *c = mmap(NULL, MEMSIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); FAIL_IF(c == MAP_FAILED); + if (pg_fault) { + int count; + for (count = 0; count < CHUNK_COUNT; count++) + c[count << 16] = 'c'; + } munmap(c, MEMSIZE); } @@ -35,7 +57,32 @@ int test_mmap(void) return 0; } -int main(void) +int main(int argc, char *argv[]) { + signed char c; + while (1) { + int option_index = 0; + + c = getopt_long(argc, argv, "", options, &option_index); + + if (c == -1) + break; + + switch (c) { + case 0: + if (options[option_index].flag != 0) + break; + + usage(); + exit(1); + break; + case 'i': + iterations = atoi(optarg); + break; + default: + usage(); + exit(1); + } + } return test_harness(test_mmap, "mmap_bench"); } diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile index ac4a52e19e59..eedce3366f64 100644 --- a/tools/testing/selftests/powerpc/copyloops/Makefile +++ b/tools/testing/selftests/powerpc/copyloops/Makefile @@ -5,8 +5,8 @@ CFLAGS += -I$(CURDIR) CFLAGS += -D SELFTEST CFLAGS += -maltivec -# Use our CFLAGS for the implicit .S rule -ASFLAGS = $(CFLAGS) +# Use our CFLAGS for the implicit .S rule & set the asm machine type +ASFLAGS = $(CFLAGS) -Wa,-mpower4 TEST_GEN_PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7 EXTRA_SOURCES := validate.c ../harness.c diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore index e715a3f2fbf4..7d7c42ed6de9 100644 --- a/tools/testing/selftests/powerpc/mm/.gitignore +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -1,4 +1,5 @@ hugetlb_vs_thp_test subpage_prot tempfile -prot_sao
\ No newline at end of file +prot_sao +segv_errors
\ No newline at end of file diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index bf315bcbe663..8ebbe96d80a8 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -2,7 +2,7 @@ noarg: $(MAKE) -C ../ -TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao +TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors TEST_GEN_FILES := tempfile include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/mm/segv_errors.c b/tools/testing/selftests/powerpc/mm/segv_errors.c new file mode 100644 index 000000000000..06ae76ee3ea1 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/segv_errors.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2017 John Sperbeck + * + * Test that an access to a mapped but inaccessible area causes a SEGV and + * reports si_code == SEGV_ACCERR. + */ + +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <signal.h> +#include <sys/mman.h> +#include <assert.h> +#include <ucontext.h> + +#include "utils.h" + +static bool faulted; +static int si_code; + +static void segv_handler(int n, siginfo_t *info, void *ctxt_v) +{ + ucontext_t *ctxt = (ucontext_t *)ctxt_v; + struct pt_regs *regs = ctxt->uc_mcontext.regs; + + faulted = true; + si_code = info->si_code; + regs->nip += 4; +} + +int test_segv_errors(void) +{ + struct sigaction act = { + .sa_sigaction = segv_handler, + .sa_flags = SA_SIGINFO, + }; + char c, *p = NULL; + + p = mmap(NULL, getpagesize(), 0, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + FAIL_IF(p == MAP_FAILED); + + FAIL_IF(sigaction(SIGSEGV, &act, NULL) != 0); + + faulted = false; + si_code = 0; + + /* + * We just need a compiler barrier, but mb() works and has the nice + * property of being easy to spot in the disassembly. + */ + mb(); + c = *p; + mb(); + + FAIL_IF(!faulted); + FAIL_IF(si_code != SEGV_ACCERR); + + faulted = false; + si_code = 0; + + mb(); + *p = c; + mb(); + + FAIL_IF(!faulted); + FAIL_IF(si_code != SEGV_ACCERR); + + return 0; +} + +int main(void) +{ + return test_harness(test_segv_errors, "segv_errors"); +} diff --git a/tools/testing/selftests/powerpc/mm/subpage_prot.c b/tools/testing/selftests/powerpc/mm/subpage_prot.c index 35ade7406dcd..3ae77ba93208 100644 --- a/tools/testing/selftests/powerpc/mm/subpage_prot.c +++ b/tools/testing/selftests/powerpc/mm/subpage_prot.c @@ -135,6 +135,16 @@ static int run_test(void *addr, unsigned long size) return 0; } +static int syscall_available(void) +{ + int rc; + + errno = 0; + rc = syscall(__NR_subpage_prot, 0, 0, 0); + + return rc == 0 || (errno != ENOENT && errno != ENOSYS); +} + int test_anon(void) { unsigned long align; @@ -145,6 +155,8 @@ int test_anon(void) void *mallocblock; unsigned long mallocsize; + SKIP_IF(!syscall_available()); + if (getpagesize() != 0x10000) { fprintf(stderr, "Kernel page size must be 64K!\n"); return 1; @@ -180,6 +192,8 @@ int test_file(void) off_t filesize; int fd; + SKIP_IF(!syscall_available()); + fd = open(file_name, O_RDWR); if (fd == -1) { perror("failed to open file"); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c index 0df3c23b7888..277dade1b382 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c @@ -79,8 +79,8 @@ trans: : [res] "=r" (result), [texasr] "=r" (texasr) : [fp_load] "r" (fp_load), [fp_load_ckpt] "r" (fp_load_ckpt), [sprn_texasr] "i" (SPRN_TEXASR) - : "memory", "r0", "r1", "r2", "r3", "r4", - "r8", "r9", "r10", "r11" + : "memory", "r0", "r1", "r3", "r4", + "r7", "r8", "r9", "r10", "r11" ); if (result) { diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c index 94e57cb89769..51427a2465f6 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c @@ -76,8 +76,7 @@ trans: : [tfhar] "=r" (tfhar), [res] "=r" (result), [texasr] "=r" (texasr), [cptr1] "=r" (cptr1) : [sprn_texasr] "i" (SPRN_TEXASR) - : "memory", "r0", "r1", "r2", "r3", "r4", - "r8", "r9", "r10", "r11", "r31" + : "memory", "r0", "r8", "r31" ); /* There are 2 32bit instructions before tbegin. */ diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c index b4081e2b22d5..17c23cabac3e 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c @@ -67,7 +67,7 @@ trans: : [res] "=r" (result), [texasr] "=r" (texasr) : [fp_load] "r" (fp_load), [fp_load_ckpt] "r" (fp_load_ckpt), [sprn_texasr] "i" (SPRN_TEXASR), [cptr1] "r" (&cptr[1]) - : "memory", "r0", "r1", "r2", "r3", "r4", + : "memory", "r0", "r1", "r3", "r4", "r7", "r8", "r9", "r10", "r11" ); diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore index 241a4a4ee0e4..bb90d4b79524 100644 --- a/tools/testing/selftests/powerpc/tm/.gitignore +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -13,3 +13,4 @@ tm-signal-context-chk-vmx tm-signal-context-chk-vsx tm-vmx-unavail tm-unavailable +tm-trap diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 8ed6f8c57230..c0e45d2dde25 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -3,8 +3,8 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu tm-signal-context-chk-vmx tm-signal-context-chk-vsx TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \ - tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable \ - $(SIGNAL_CONTEXT_CHK_TESTS) + tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \ + $(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn include ../../lib.mk @@ -16,8 +16,9 @@ $(OUTPUT)/tm-syscall: tm-syscall-asm.S $(OUTPUT)/tm-syscall: CFLAGS += -I../../../../../usr/include $(OUTPUT)/tm-tmspr: CFLAGS += -pthread $(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64 -$(OUTPUT)/tm-resched-dscr: ../pmu/lib.o +$(OUTPUT)/tm-resched-dscr: ../pmu/lib.c $(OUTPUT)/tm-unavailable: CFLAGS += -O0 -pthread -m64 -Wno-error=uninitialized -mvsx +$(OUTPUT)/tm-trap: CFLAGS += -O0 -pthread -m64 SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS)) $(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S diff --git a/tools/testing/selftests/powerpc/tm/tm-sigreturn.c b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c new file mode 100644 index 000000000000..85d63449243b --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2015, Laurent Dufour, IBM Corp. + * + * Test the kernel's signal returning code to check reclaim is done if the + * sigreturn() is called while in a transaction (suspended since active is + * already dropped trough the system call path). + * + * The kernel must discard the transaction when entering sigreturn, since + * restoring the potential TM SPRS from the signal frame is requiring to not be + * in a transaction. + */ + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "tm.h" +#include "utils.h" + + +void handler(int sig) +{ + uint64_t ret; + + asm __volatile__( + "li 3,1 ;" + "tbegin. ;" + "beq 1f ;" + "li 3,0 ;" + "tsuspend. ;" + "1: ;" + "std%X[ret] 3, %[ret] ;" + : [ret] "=m"(ret) + : + : "memory", "3", "cr0"); + + if (ret) + exit(1); + + /* + * We return from the signal handle while in a suspended transaction + */ +} + + +int tm_sigreturn(void) +{ + struct sigaction sa; + uint64_t ret = 0; + + SKIP_IF(!have_htm()); + + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = handler; + sigemptyset(&sa.sa_mask); + + if (sigaction(SIGSEGV, &sa, NULL)) + exit(1); + + asm __volatile__( + "tbegin. ;" + "beq 1f ;" + "li 3,0 ;" + "std 3,0(3) ;" /* trigger SEGV */ + "li 3,1 ;" + "std%X[ret] 3,%[ret] ;" + "tend. ;" + "b 2f ;" + "1: ;" + "li 3,2 ;" + "std%X[ret] 3,%[ret] ;" + "2: ;" + : [ret] "=m"(ret) + : + : "memory", "3", "cr0"); + + if (ret != 2) + exit(1); + + exit(0); +} + +int main(void) +{ + return test_harness(tm_sigreturn, "tm_sigreturn"); +} diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c new file mode 100644 index 000000000000..179d592f0073 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-trap.c @@ -0,0 +1,331 @@ +/* + * Copyright 2017, Gustavo Romero, IBM Corp. + * Licensed under GPLv2. + * + * Check if thread endianness is flipped inadvertently to BE on trap + * caught in TM whilst MSR.FP and MSR.VEC are zero (i.e. just after + * load_fp and load_vec overflowed). + * + * The issue can be checked on LE machines simply by zeroing load_fp + * and load_vec and then causing a trap in TM. Since the endianness + * changes to BE on return from the signal handler, 'nop' is + * thread as an illegal instruction in following sequence: + * tbegin. + * beq 1f + * trap + * tend. + * 1: nop + * + * However, although the issue is also present on BE machines, it's a + * bit trickier to check it on BE machines because MSR.LE bit is set + * to zero which determines a BE endianness that is the native + * endianness on BE machines, so nothing notably critical happens, + * i.e. no illegal instruction is observed immediately after returning + * from the signal handler (as it happens on LE machines). Thus to test + * it on BE machines LE endianness is forced after a first trap and then + * the endianness is verified on subsequent traps to determine if the + * endianness "flipped back" to the native endianness (BE). + */ + +#define _GNU_SOURCE +#include <error.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <htmintrin.h> +#include <inttypes.h> +#include <pthread.h> +#include <sched.h> +#include <signal.h> +#include <stdbool.h> + +#include "tm.h" +#include "utils.h" + +#define pr_error(error_code, format, ...) \ + error_at_line(1, error_code, __FILE__, __LINE__, format, ##__VA_ARGS__) + +#define MSR_LE 1UL +#define LE 1UL + +pthread_t t0_ping; +pthread_t t1_pong; + +int exit_from_pong; + +int trap_event; +int le; + +bool success; + +void trap_signal_handler(int signo, siginfo_t *si, void *uc) +{ + ucontext_t *ucp = uc; + uint64_t thread_endianness; + + /* Get thread endianness: extract bit LE from MSR */ + thread_endianness = MSR_LE & ucp->uc_mcontext.gp_regs[PT_MSR]; + + /*** + * Little-Endian Machine + */ + + if (le) { + /* First trap event */ + if (trap_event == 0) { + /* Do nothing. Since it is returning from this trap + * event that endianness is flipped by the bug, so just + * let the process return from the signal handler and + * check on the second trap event if endianness is + * flipped or not. + */ + } + /* Second trap event */ + else if (trap_event == 1) { + /* + * Since trap was caught in TM on first trap event, if + * endianness was still LE (not flipped inadvertently) + * after returning from the signal handler instruction + * (1) is executed (basically a 'nop'), as it's located + * at address of tbegin. +4 (rollback addr). As (1) on + * LE endianness does in effect nothing, instruction (2) + * is then executed again as 'trap', generating a second + * trap event (note that in that case 'trap' is caught + * not in transacional mode). On te other hand, if after + * the return from the signal handler the endianness in- + * advertently flipped, instruction (1) is tread as a + * branch instruction, i.e. b .+8, hence instruction (3) + * and (4) are executed (tbegin.; trap;) and we get sim- + * ilaly on the trap signal handler, but now in TM mode. + * Either way, it's now possible to check the MSR LE bit + * once in the trap handler to verify if endianness was + * flipped or not after the return from the second trap + * event. If endianness is flipped, the bug is present. + * Finally, getting a trap in TM mode or not is just + * worth noting because it affects the math to determine + * the offset added to the NIP on return: the NIP for a + * trap caught in TM is the rollback address, i.e. the + * next instruction after 'tbegin.', whilst the NIP for + * a trap caught in non-transactional mode is the very + * same address of the 'trap' instruction that generated + * the trap event. + */ + + if (thread_endianness == LE) { + /* Go to 'success', i.e. instruction (6) */ + ucp->uc_mcontext.gp_regs[PT_NIP] += 16; + } else { + /* + * Thread endianness is BE, so it flipped + * inadvertently. Thus we flip back to LE and + * set NIP to go to 'failure', instruction (5). + */ + ucp->uc_mcontext.gp_regs[PT_MSR] |= 1UL; + ucp->uc_mcontext.gp_regs[PT_NIP] += 4; + } + } + } + + /*** + * Big-Endian Machine + */ + + else { + /* First trap event */ + if (trap_event == 0) { + /* + * Force thread endianness to be LE. Instructions (1), + * (3), and (4) will be executed, generating a second + * trap in TM mode. + */ + ucp->uc_mcontext.gp_regs[PT_MSR] |= 1UL; + } + /* Second trap event */ + else if (trap_event == 1) { + /* + * Do nothing. If bug is present on return from this + * second trap event endianness will flip back "automat- + * ically" to BE, otherwise thread endianness will + * continue to be LE, just as it was set above. + */ + } + /* A third trap event */ + else { + /* + * Once here it means that after returning from the sec- + * ond trap event instruction (4) (trap) was executed + * as LE, generating a third trap event. In that case + * endianness is still LE as set on return from the + * first trap event, hence no bug. Otherwise, bug + * flipped back to BE on return from the second trap + * event and instruction (4) was executed as 'tdi' (so + * basically a 'nop') and branch to 'failure' in + * instruction (5) was taken to indicate failure and we + * never get here. + */ + + /* + * Flip back to BE and go to instruction (6), i.e. go to + * 'success'. + */ + ucp->uc_mcontext.gp_regs[PT_MSR] &= ~1UL; + ucp->uc_mcontext.gp_regs[PT_NIP] += 8; + } + } + + trap_event++; +} + +void usr1_signal_handler(int signo, siginfo_t *si, void *not_used) +{ + /* Got a USR1 signal from ping(), so just tell pong() to exit */ + exit_from_pong = 1; +} + +void *ping(void *not_used) +{ + uint64_t i; + + trap_event = 0; + + /* + * Wait an amount of context switches so load_fp and load_vec overflows + * and MSR_[FP|VEC|V] is 0. + */ + for (i = 0; i < 1024*1024*512; i++) + ; + + asm goto( + /* + * [NA] means "Native Endianness", i.e. it tells how a + * instruction is executed on machine's native endianness (in + * other words, native endianness matches kernel endianness). + * [OP] means "Opposite Endianness", i.e. on a BE machine, it + * tells how a instruction is executed as a LE instruction; con- + * versely, on a LE machine, it tells how a instruction is + * executed as a BE instruction. When [NA] is omitted, it means + * that the native interpretation of a given instruction is not + * relevant for the test. Likewise when [OP] is omitted. + */ + + " tbegin. ;" /* (0) tbegin. [NA] */ + " tdi 0, 0, 0x48;" /* (1) nop [NA]; b (3) [OP] */ + " trap ;" /* (2) trap [NA] */ + ".long 0x1D05007C;" /* (3) tbegin. [OP] */ + ".long 0x0800E07F;" /* (4) trap [OP]; nop [NA] */ + " b %l[failure] ;" /* (5) b [NA]; MSR.LE flipped (bug) */ + " b %l[success] ;" /* (6) b [NA]; MSR.LE did not flip (ok)*/ + + : : : : failure, success); + +failure: + success = false; + goto exit_from_ping; + +success: + success = true; + +exit_from_ping: + /* Tell pong() to exit before leaving */ + pthread_kill(t1_pong, SIGUSR1); + return NULL; +} + +void *pong(void *not_used) +{ + while (!exit_from_pong) + /* + * Induce context switches on ping() thread + * until ping() finishes its job and signs + * to exit from this loop. + */ + sched_yield(); + + return NULL; +} + +int tm_trap_test(void) +{ + uint16_t k = 1; + + int rc; + + pthread_attr_t attr; + cpu_set_t cpuset; + + struct sigaction trap_sa; + + SKIP_IF(!have_htm()); + + trap_sa.sa_flags = SA_SIGINFO; + trap_sa.sa_sigaction = trap_signal_handler; + sigaction(SIGTRAP, &trap_sa, NULL); + + struct sigaction usr1_sa; + + usr1_sa.sa_flags = SA_SIGINFO; + usr1_sa.sa_sigaction = usr1_signal_handler; + sigaction(SIGUSR1, &usr1_sa, NULL); + + /* Set only CPU 0 in the mask. Both threads will be bound to cpu 0. */ + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + + /* Init pthread attribute */ + rc = pthread_attr_init(&attr); + if (rc) + pr_error(rc, "pthread_attr_init()"); + + /* + * Bind thread ping() and pong() both to CPU 0 so they ping-pong and + * speed up context switches on ping() thread, speeding up the load_fp + * and load_vec overflow. + */ + rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset); + if (rc) + pr_error(rc, "pthread_attr_setaffinity()"); + + /* Figure out the machine endianness */ + le = (int) *(uint8_t *)&k; + + printf("%s machine detected. Checking if endianness flips %s", + le ? "Little-Endian" : "Big-Endian", + "inadvertently on trap in TM... "); + + rc = fflush(0); + if (rc) + pr_error(rc, "fflush()"); + + /* Launch ping() */ + rc = pthread_create(&t0_ping, &attr, ping, NULL); + if (rc) + pr_error(rc, "pthread_create()"); + + exit_from_pong = 0; + + /* Launch pong() */ + rc = pthread_create(&t1_pong, &attr, pong, NULL); + if (rc) + pr_error(rc, "pthread_create()"); + + rc = pthread_join(t0_ping, NULL); + if (rc) + pr_error(rc, "pthread_join()"); + + rc = pthread_join(t1_pong, NULL); + if (rc) + pr_error(rc, "pthread_join()"); + + if (success) { + printf("no.\n"); /* no, endianness did not flip inadvertently */ + return EXIT_SUCCESS; + } + + printf("yes!\n"); /* yes, endianness did flip inadvertently */ + return EXIT_FAILURE; +} + +int main(int argc, char **argv) +{ + return test_harness(tm_trap_test, "tm_trap_test"); +} diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c index 96c37f84ce54..156c8e750259 100644 --- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c +++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c @@ -15,6 +15,7 @@ */ #define _GNU_SOURCE +#include <error.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> @@ -33,6 +34,11 @@ #define VSX_UNA_EXCEPTION 2 #define NUM_EXCEPTIONS 3 +#define err_at_line(status, errnum, format, ...) \ + error_at_line(status, errnum, __FILE__, __LINE__, format ##__VA_ARGS__) + +#define pr_warn(code, format, ...) err_at_line(0, code, format, ##__VA_ARGS__) +#define pr_err(code, format, ...) err_at_line(1, code, format, ##__VA_ARGS__) struct Flags { int touch_fp; @@ -74,7 +80,7 @@ bool is_failure(uint64_t condition_reg) return ((condition_reg >> 28) & 0xa) == 0xa; } -void *ping(void *input) +void *tm_una_ping(void *input) { /* @@ -274,7 +280,7 @@ void *ping(void *input) } /* Thread to force context switch */ -void *pong(void *not_used) +void *tm_una_pong(void *not_used) { /* Wait thread get its name "pong". */ if (DEBUG) @@ -303,10 +309,19 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr) * checking if the failure cause is the one we expect. */ do { - /* Bind 'ping' to CPU 0, as specified in 'attr'. */ - pthread_create(&t0, attr, ping, (void *) &flags); - pthread_setname_np(t0, "ping"); - pthread_join(t0, &ret_value); + int rc; + + /* Bind to CPU 0, as specified in 'attr'. */ + rc = pthread_create(&t0, attr, tm_una_ping, (void *) &flags); + if (rc) + pr_err(rc, "pthread_create()"); + rc = pthread_setname_np(t0, "tm_una_ping"); + if (rc) + pr_warn(rc, "pthread_setname_np"); + rc = pthread_join(t0, &ret_value); + if (rc) + pr_err(rc, "pthread_join"); + retries--; } while (ret_value != NULL && retries); @@ -318,25 +333,37 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr) } } -int main(int argc, char **argv) +int tm_unavailable_test(void) { - int exception; /* FP = 0, VEC = 1, VSX = 2 */ + int rc, exception; /* FP = 0, VEC = 1, VSX = 2 */ pthread_t t1; pthread_attr_t attr; cpu_set_t cpuset; + SKIP_IF(!have_htm()); + /* Set only CPU 0 in the mask. Both threads will be bound to CPU 0. */ CPU_ZERO(&cpuset); CPU_SET(0, &cpuset); /* Init pthread attribute. */ - pthread_attr_init(&attr); + rc = pthread_attr_init(&attr); + if (rc) + pr_err(rc, "pthread_attr_init()"); /* Set CPU 0 mask into the pthread attribute. */ - pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset); + rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset); + if (rc) + pr_err(rc, "pthread_attr_setaffinity_np()"); + + rc = pthread_create(&t1, &attr /* Bind to CPU 0 */, tm_una_pong, NULL); + if (rc) + pr_err(rc, "pthread_create()"); - pthread_create(&t1, &attr /* Bind 'pong' to CPU 0 */, pong, NULL); - pthread_setname_np(t1, "pong"); /* Name it for systemtap convenience */ + /* Name it for systemtap convenience */ + rc = pthread_setname_np(t1, "tm_una_pong"); + if (rc) + pr_warn(rc, "pthread_create()"); flags.result = 0; @@ -369,3 +396,9 @@ int main(int argc, char **argv) exit(0); } } + +int main(int argc, char **argv) +{ + test_harness_set_timeout(220); + return test_harness(tm_unavailable_test, "tm_unavailable_test"); +} diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore new file mode 100644 index 000000000000..6c16f77c722c --- /dev/null +++ b/tools/testing/selftests/proc/.gitignore @@ -0,0 +1,8 @@ +/proc-loadavg-001 +/proc-self-map-files-001 +/proc-self-map-files-002 +/proc-self-syscall +/proc-self-wchan +/proc-uptime-001 +/proc-uptime-002 +/read diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile new file mode 100644 index 000000000000..dbb87e56264c --- /dev/null +++ b/tools/testing/selftests/proc/Makefile @@ -0,0 +1,13 @@ +CFLAGS += -Wall -O2 + +TEST_GEN_PROGS := +TEST_GEN_PROGS += proc-loadavg-001 +TEST_GEN_PROGS += proc-self-map-files-001 +TEST_GEN_PROGS += proc-self-map-files-002 +TEST_GEN_PROGS += proc-self-syscall +TEST_GEN_PROGS += proc-self-wchan +TEST_GEN_PROGS += proc-uptime-001 +TEST_GEN_PROGS += proc-uptime-002 +TEST_GEN_PROGS += read + +include ../lib.mk diff --git a/tools/testing/selftests/proc/config b/tools/testing/selftests/proc/config new file mode 100644 index 000000000000..68fbd2b35884 --- /dev/null +++ b/tools/testing/selftests/proc/config @@ -0,0 +1 @@ +CONFIG_PROC_FS=y diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c new file mode 100644 index 000000000000..fcff7047000d --- /dev/null +++ b/tools/testing/selftests/proc/proc-loadavg-001.c @@ -0,0 +1,83 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* Test that /proc/loadavg correctly reports last pid in pid namespace. */ +#define _GNU_SOURCE +#include <errno.h> +#include <sched.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/wait.h> + +int main(void) +{ + pid_t pid; + int wstatus; + + if (unshare(CLONE_NEWPID) == -1) { + if (errno == ENOSYS || errno == EPERM) + return 2; + return 1; + } + + pid = fork(); + if (pid == -1) + return 1; + if (pid == 0) { + char buf[128], *p; + int fd; + ssize_t rv; + + fd = open("/proc/loadavg" , O_RDONLY); + if (fd == -1) + return 1; + rv = read(fd, buf, sizeof(buf)); + if (rv < 3) + return 1; + p = buf + rv; + + /* pid 1 */ + if (!(p[-3] == ' ' && p[-2] == '1' && p[-1] == '\n')) + return 1; + + pid = fork(); + if (pid == -1) + return 1; + if (pid == 0) + return 0; + if (waitpid(pid, NULL, 0) == -1) + return 1; + + lseek(fd, 0, SEEK_SET); + rv = read(fd, buf, sizeof(buf)); + if (rv < 3) + return 1; + p = buf + rv; + + /* pid 2 */ + if (!(p[-3] == ' ' && p[-2] == '2' && p[-1] == '\n')) + return 1; + + return 0; + } + + if (waitpid(pid, &wstatus, 0) == -1) + return 1; + if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) + return 0; + return 1; +} diff --git a/tools/testing/selftests/proc/proc-self-map-files-001.c b/tools/testing/selftests/proc/proc-self-map-files-001.c new file mode 100644 index 000000000000..4209c64283d6 --- /dev/null +++ b/tools/testing/selftests/proc/proc-self-map-files-001.c @@ -0,0 +1,82 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* Test readlink /proc/self/map_files/... */ +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/mman.h> +#include <stdlib.h> + +static void pass(const char *fmt, unsigned long a, unsigned long b) +{ + char name[64]; + char buf[64]; + + snprintf(name, sizeof(name), fmt, a, b); + if (readlink(name, buf, sizeof(buf)) == -1) + exit(1); +} + +static void fail(const char *fmt, unsigned long a, unsigned long b) +{ + char name[64]; + char buf[64]; + + snprintf(name, sizeof(name), fmt, a, b); + if (readlink(name, buf, sizeof(buf)) == -1 && errno == ENOENT) + return; + exit(1); +} + +int main(void) +{ + const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE); + void *p; + int fd; + unsigned long a, b; + + fd = open("/dev/zero", O_RDONLY); + if (fd == -1) + return 1; + + p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE, fd, 0); + if (p == MAP_FAILED) + return 1; + + a = (unsigned long)p; + b = (unsigned long)p + PAGE_SIZE; + + pass("/proc/self/map_files/%lx-%lx", a, b); + fail("/proc/self/map_files/ %lx-%lx", a, b); + fail("/proc/self/map_files/%lx -%lx", a, b); + fail("/proc/self/map_files/%lx- %lx", a, b); + fail("/proc/self/map_files/%lx-%lx ", a, b); + fail("/proc/self/map_files/0%lx-%lx", a, b); + fail("/proc/self/map_files/%lx-0%lx", a, b); + if (sizeof(long) == 4) { + fail("/proc/self/map_files/100000000%lx-%lx", a, b); + fail("/proc/self/map_files/%lx-100000000%lx", a, b); + } else if (sizeof(long) == 8) { + fail("/proc/self/map_files/10000000000000000%lx-%lx", a, b); + fail("/proc/self/map_files/%lx-10000000000000000%lx", a, b); + } else + return 1; + + return 0; +} diff --git a/tools/testing/selftests/proc/proc-self-map-files-002.c b/tools/testing/selftests/proc/proc-self-map-files-002.c new file mode 100644 index 000000000000..6f1f4a6e1ecb --- /dev/null +++ b/tools/testing/selftests/proc/proc-self-map-files-002.c @@ -0,0 +1,85 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* Test readlink /proc/self/map_files/... with address 0. */ +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/mman.h> +#include <stdlib.h> + +static void pass(const char *fmt, unsigned long a, unsigned long b) +{ + char name[64]; + char buf[64]; + + snprintf(name, sizeof(name), fmt, a, b); + if (readlink(name, buf, sizeof(buf)) == -1) + exit(1); +} + +static void fail(const char *fmt, unsigned long a, unsigned long b) +{ + char name[64]; + char buf[64]; + + snprintf(name, sizeof(name), fmt, a, b); + if (readlink(name, buf, sizeof(buf)) == -1 && errno == ENOENT) + return; + exit(1); +} + +int main(void) +{ + const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE); + void *p; + int fd; + unsigned long a, b; + + fd = open("/dev/zero", O_RDONLY); + if (fd == -1) + return 1; + + p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0); + if (p == MAP_FAILED) { + if (errno == EPERM) + return 2; + return 1; + } + + a = (unsigned long)p; + b = (unsigned long)p + PAGE_SIZE; + + pass("/proc/self/map_files/%lx-%lx", a, b); + fail("/proc/self/map_files/ %lx-%lx", a, b); + fail("/proc/self/map_files/%lx -%lx", a, b); + fail("/proc/self/map_files/%lx- %lx", a, b); + fail("/proc/self/map_files/%lx-%lx ", a, b); + fail("/proc/self/map_files/0%lx-%lx", a, b); + fail("/proc/self/map_files/%lx-0%lx", a, b); + if (sizeof(long) == 4) { + fail("/proc/self/map_files/100000000%lx-%lx", a, b); + fail("/proc/self/map_files/%lx-100000000%lx", a, b); + } else if (sizeof(long) == 8) { + fail("/proc/self/map_files/10000000000000000%lx-%lx", a, b); + fail("/proc/self/map_files/%lx-10000000000000000%lx", a, b); + } else + return 1; + + return 0; +} diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c new file mode 100644 index 000000000000..5ab5f4810e43 --- /dev/null +++ b/tools/testing/selftests/proc/proc-self-syscall.c @@ -0,0 +1,60 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#define _GNU_SOURCE +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <unistd.h> +#include <string.h> +#include <stdio.h> + +static inline ssize_t sys_read(int fd, void *buf, size_t len) +{ + return syscall(SYS_read, fd, buf, len); +} + +int main(void) +{ + char buf1[64]; + char buf2[64]; + int fd; + ssize_t rv; + + fd = open("/proc/self/syscall", O_RDONLY); + if (fd == -1) { + if (errno == ENOENT) + return 2; + return 1; + } + + /* Do direct system call as libc can wrap anything. */ + snprintf(buf1, sizeof(buf1), "%ld 0x%lx 0x%lx 0x%lx", + (long)SYS_read, (long)fd, (long)buf2, (long)sizeof(buf2)); + + memset(buf2, 0, sizeof(buf2)); + rv = sys_read(fd, buf2, sizeof(buf2)); + if (rv < 0) + return 1; + if (rv < strlen(buf1)) + return 1; + if (strncmp(buf1, buf2, strlen(buf1)) != 0) + return 1; + + return 0; +} diff --git a/tools/testing/selftests/proc/proc-self-wchan.c b/tools/testing/selftests/proc/proc-self-wchan.c new file mode 100644 index 000000000000..a38b2fbaa7ad --- /dev/null +++ b/tools/testing/selftests/proc/proc-self-wchan.c @@ -0,0 +1,40 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <unistd.h> + +int main(void) +{ + char buf[64]; + int fd; + + fd = open("/proc/self/wchan", O_RDONLY); + if (fd == -1) { + if (errno == ENOENT) + return 2; + return 1; + } + + buf[0] = '\0'; + if (read(fd, buf, sizeof(buf)) != 1) + return 1; + if (buf[0] != '0') + return 1; + return 0; +} diff --git a/tools/testing/selftests/proc/proc-uptime-001.c b/tools/testing/selftests/proc/proc-uptime-001.c new file mode 100644 index 000000000000..781f7a50fc3f --- /dev/null +++ b/tools/testing/selftests/proc/proc-uptime-001.c @@ -0,0 +1,45 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +// Test that values in /proc/uptime increment monotonically. +#undef NDEBUG +#include <assert.h> +#include <stdint.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include "proc-uptime.h" + +int main(void) +{ + uint64_t start, u0, u1, i0, i1; + int fd; + + fd = open("/proc/uptime", O_RDONLY); + assert(fd >= 0); + + proc_uptime(fd, &u0, &i0); + start = u0; + do { + proc_uptime(fd, &u1, &i1); + assert(u1 >= u0); + assert(i1 >= i0); + u0 = u1; + i0 = i1; + } while (u1 - start < 100); + + return 0; +} diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c new file mode 100644 index 000000000000..30e2b7849089 --- /dev/null +++ b/tools/testing/selftests/proc/proc-uptime-002.c @@ -0,0 +1,79 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +// Test that values in /proc/uptime increment monotonically +// while shifting across CPUs. +#define _GNU_SOURCE +#undef NDEBUG +#include <assert.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <stdlib.h> +#include <string.h> + +#include <stdint.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include "proc-uptime.h" + +static inline int sys_sched_getaffinity(pid_t pid, unsigned int len, unsigned long *m) +{ + return syscall(SYS_sched_getaffinity, pid, len, m); +} + +static inline int sys_sched_setaffinity(pid_t pid, unsigned int len, unsigned long *m) +{ + return syscall(SYS_sched_setaffinity, pid, len, m); +} + +int main(void) +{ + unsigned int len; + unsigned long *m; + unsigned int cpu; + uint64_t u0, u1, i0, i1; + int fd; + + /* find out "nr_cpu_ids" */ + m = NULL; + len = 0; + do { + len += sizeof(unsigned long); + free(m); + m = malloc(len); + } while (sys_sched_getaffinity(0, len, m) == -EINVAL); + + fd = open("/proc/uptime", O_RDONLY); + assert(fd >= 0); + + proc_uptime(fd, &u0, &i0); + for (cpu = 0; cpu < len * 8; cpu++) { + memset(m, 0, len); + m[cpu / (8 * sizeof(unsigned long))] |= 1UL << (cpu % (8 * sizeof(unsigned long))); + + /* CPU might not exist, ignore error */ + sys_sched_setaffinity(0, len, m); + + proc_uptime(fd, &u1, &i1); + assert(u1 >= u0); + assert(i1 >= i0); + u0 = u1; + i0 = i1; + } + + return 0; +} diff --git a/tools/testing/selftests/proc/proc-uptime.h b/tools/testing/selftests/proc/proc-uptime.h new file mode 100644 index 000000000000..0e464b50e9d9 --- /dev/null +++ b/tools/testing/selftests/proc/proc-uptime.h @@ -0,0 +1,74 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#undef NDEBUG +#include <assert.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> + +static unsigned long long xstrtoull(const char *p, char **end) +{ + if (*p == '0') { + *end = (char *)p + 1; + return 0; + } else if ('1' <= *p && *p <= '9') { + unsigned long long val; + + errno = 0; + val = strtoull(p, end, 10); + assert(errno == 0); + return val; + } else + assert(0); +} + +static void proc_uptime(int fd, uint64_t *uptime, uint64_t *idle) +{ + uint64_t val1, val2; + char buf[64], *p; + ssize_t rv; + + /* save "p < end" checks */ + memset(buf, 0, sizeof(buf)); + rv = pread(fd, buf, sizeof(buf), 0); + assert(0 <= rv && rv <= sizeof(buf)); + buf[sizeof(buf) - 1] = '\0'; + + p = buf; + + val1 = xstrtoull(p, &p); + assert(p[0] == '.'); + assert('0' <= p[1] && p[1] <= '9'); + assert('0' <= p[2] && p[2] <= '9'); + assert(p[3] == ' '); + + val2 = (p[1] - '0') * 10 + p[2] - '0'; + *uptime = val1 * 100 + val2; + + p += 4; + + val1 = xstrtoull(p, &p); + assert(p[0] == '.'); + assert('0' <= p[1] && p[1] <= '9'); + assert('0' <= p[2] && p[2] <= '9'); + assert(p[3] == '\n'); + + val2 = (p[1] - '0') * 10 + p[2] - '0'; + *idle = val1 * 100 + val2; + + assert(p + 4 == buf + rv); +} diff --git a/tools/testing/selftests/proc/read.c b/tools/testing/selftests/proc/read.c new file mode 100644 index 000000000000..1e73c2232097 --- /dev/null +++ b/tools/testing/selftests/proc/read.c @@ -0,0 +1,147 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +// Test +// 1) read of every file in /proc +// 2) readlink of every symlink in /proc +// 3) recursively (1) + (2) for every directory in /proc +// 4) write to /proc/*/clear_refs and /proc/*/task/*/clear_refs +// 5) write to /proc/sysrq-trigger +#undef NDEBUG +#include <assert.h> +#include <errno.h> +#include <sys/types.h> +#include <dirent.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +static inline bool streq(const char *s1, const char *s2) +{ + return strcmp(s1, s2) == 0; +} + +static struct dirent *xreaddir(DIR *d) +{ + struct dirent *de; + + errno = 0; + de = readdir(d); + if (!de && errno != 0) { + exit(1); + } + return de; +} + +static void f_reg(DIR *d, const char *filename) +{ + char buf[4096]; + int fd; + ssize_t rv; + + /* read from /proc/kmsg can block */ + fd = openat(dirfd(d), filename, O_RDONLY|O_NONBLOCK); + if (fd == -1) + return; + rv = read(fd, buf, sizeof(buf)); + assert((0 <= rv && rv <= sizeof(buf)) || rv == -1); + close(fd); +} + +static void f_reg_write(DIR *d, const char *filename, const char *buf, size_t len) +{ + int fd; + ssize_t rv; + + fd = openat(dirfd(d), filename, O_WRONLY); + if (fd == -1) + return; + rv = write(fd, buf, len); + assert((0 <= rv && rv <= len) || rv == -1); + close(fd); +} + +static void f_lnk(DIR *d, const char *filename) +{ + char buf[4096]; + ssize_t rv; + + rv = readlinkat(dirfd(d), filename, buf, sizeof(buf)); + assert((0 <= rv && rv <= sizeof(buf)) || rv == -1); +} + +static void f(DIR *d, unsigned int level) +{ + struct dirent *de; + + de = xreaddir(d); + assert(de->d_type == DT_DIR); + assert(streq(de->d_name, ".")); + + de = xreaddir(d); + assert(de->d_type == DT_DIR); + assert(streq(de->d_name, "..")); + + while ((de = xreaddir(d))) { + assert(!streq(de->d_name, ".")); + assert(!streq(de->d_name, "..")); + + switch (de->d_type) { + DIR *dd; + int fd; + + case DT_REG: + if (level == 0 && streq(de->d_name, "sysrq-trigger")) { + f_reg_write(d, de->d_name, "h", 1); + } else if (level == 1 && streq(de->d_name, "clear_refs")) { + f_reg_write(d, de->d_name, "1", 1); + } else if (level == 3 && streq(de->d_name, "clear_refs")) { + f_reg_write(d, de->d_name, "1", 1); + } else { + f_reg(d, de->d_name); + } + break; + case DT_DIR: + fd = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY); + if (fd == -1) + continue; + dd = fdopendir(fd); + if (!dd) + continue; + f(dd, level + 1); + closedir(dd); + break; + case DT_LNK: + f_lnk(d, de->d_name); + break; + default: + assert(0); + } + } +} + +int main(void) +{ + DIR *d; + + d = opendir("/proc"); + if (!d) + return 2; + f(d, 0); + return 0; +} diff --git a/tools/testing/selftests/pstore/config b/tools/testing/selftests/pstore/config index 6a8e5a9bfc10..d148f9f89fb6 100644 --- a/tools/testing/selftests/pstore/config +++ b/tools/testing/selftests/pstore/config @@ -2,3 +2,4 @@ CONFIG_MISC_FILESYSTEMS=y CONFIG_PSTORE=y CONFIG_PSTORE_PMSG=y CONFIG_PSTORE_CONSOLE=y +CONFIG_PSTORE_RAM=m diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c index 5d2eae16f7ee..a5d8f0ab0da0 100644 --- a/tools/testing/selftests/ptp/testptp.c +++ b/tools/testing/selftests/ptp/testptp.c @@ -60,9 +60,7 @@ static int clock_adjtime(clockid_t id, struct timex *tx) static clockid_t get_clockid(int fd) { #define CLOCKFD 3 -#define FD_TO_CLOCKID(fd) ((~(clockid_t) (fd) << 3) | CLOCKFD) - - return FD_TO_CLOCKID(fd); + return (((unsigned int) ~fd) << 3) | CLOCKFD; } static void handle_alarm(int s) diff --git a/tools/testing/selftests/rcutorture/bin/config2frag.sh b/tools/testing/selftests/rcutorture/bin/config2frag.sh deleted file mode 100755 index 56f51ae13d73..000000000000 --- a/tools/testing/selftests/rcutorture/bin/config2frag.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -# Usage: config2frag.sh < .config > configfrag -# -# Converts the "# CONFIG_XXX is not set" to "CONFIG_XXX=n" so that the -# resulting file becomes a legitimate Kconfig fragment. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# Copyright (C) IBM Corporation, 2013 -# -# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> - -LANG=C sed -e 's/^# CONFIG_\([a-zA-Z0-9_]*\) is not set$/CONFIG_\1=n/' diff --git a/tools/testing/selftests/rcutorture/bin/configinit.sh b/tools/testing/selftests/rcutorture/bin/configinit.sh index 51f66a7ce876..c15f270e121d 100755 --- a/tools/testing/selftests/rcutorture/bin/configinit.sh +++ b/tools/testing/selftests/rcutorture/bin/configinit.sh @@ -51,7 +51,7 @@ then mkdir $builddir fi else - echo Bad build directory: \"$builddir\" + echo Bad build directory: \"$buildloc\" exit 2 fi fi diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh index 07a13779eece..65f6655026f0 100644 --- a/tools/testing/selftests/rcutorture/bin/functions.sh +++ b/tools/testing/selftests/rcutorture/bin/functions.sh @@ -136,6 +136,9 @@ identify_boot_image () { qemu-system-x86_64|qemu-system-i386) echo arch/x86/boot/bzImage ;; + qemu-system-aarch64) + echo arch/arm64/boot/Image + ;; *) echo vmlinux ;; @@ -158,6 +161,9 @@ identify_qemu () { elif echo $u | grep -q "Intel 80386" then echo qemu-system-i386 + elif echo $u | grep -q aarch64 + then + echo qemu-system-aarch64 elif uname -a | grep -q ppc64 then echo qemu-system-ppc64 @@ -176,16 +182,20 @@ identify_qemu () { # Output arguments for the qemu "-append" string based on CPU type # and the TORTURE_QEMU_INTERACTIVE environment variable. identify_qemu_append () { + local console=ttyS0 case "$1" in qemu-system-x86_64|qemu-system-i386) echo noapic selinux=0 initcall_debug debug ;; + qemu-system-aarch64) + console=ttyAMA0 + ;; esac if test -n "$TORTURE_QEMU_INTERACTIVE" then echo root=/dev/sda else - echo console=ttyS0 + echo console=$console fi } @@ -197,6 +207,9 @@ identify_qemu_args () { case "$1" in qemu-system-x86_64|qemu-system-i386) ;; + qemu-system-aarch64) + echo -machine virt,gic-version=host -cpu host + ;; qemu-system-ppc64) echo -enable-kvm -M pseries -nodefaults echo -device spapr-vscsi @@ -254,7 +267,7 @@ specify_qemu_cpus () { echo $2 else case "$1" in - qemu-system-x86_64|qemu-system-i386) + qemu-system-x86_64|qemu-system-i386|qemu-system-aarch64) echo $2 -smp $3 ;; qemu-system-ppc64) diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh index fb66d0173638..34d126734cde 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh @@ -29,11 +29,6 @@ then exit 1 fi builddir=${2} -if test -z "$builddir" -o ! -d "$builddir" -o ! -w "$builddir" -then - echo "kvm-build.sh :$builddir: Not a writable directory, cannot build into it" - exit 1 -fi T=${TMPDIR-/tmp}/test-linux.sh.$$ trap 'rm -rf $T' 0 diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh index 43f764098e50..2de92f43ee8c 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh @@ -23,7 +23,7 @@ # Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> i="$1" -if test -d $i +if test -d "$i" -a -r "$i" then : else diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh index 559e01ac86be..c2e1bb6d0cba 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh @@ -23,14 +23,14 @@ # Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> i="$1" -if test -d $i +if test -d "$i" -a -r "$i" then : else echo Unreadable results directory: $i exit 1 fi -. tools/testing/selftests/rcutorture/bin/functions.sh +. functions.sh configfile=`echo $i | sed -e 's/^.*\///'` ngps=`grep ver: $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* ver: //' -e 's/ .*$//'` diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh index f79b0e9e84fc..8948f7926b21 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh @@ -26,7 +26,7 @@ # Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> i="$1" -. tools/testing/selftests/rcutorture/bin/functions.sh +. functions.sh if test "`grep -c 'rcu_exp_grace_period.*start' < $i/console.log`" -lt 100 then @@ -39,30 +39,31 @@ sed -e 's/us : / : /' | tr -d '\015' | awk ' $8 == "start" { - if (starttask != "") + if (startseq != "") nlost++; starttask = $1; starttime = $3; startseq = $7; + seqtask[startseq] = starttask; } $8 == "end" { - if (starttask == $1 && startseq == $7) { + if (startseq == $7) { curgpdur = $3 - starttime; gptimes[++n] = curgpdur; gptaskcnt[starttask]++; sum += curgpdur; if (curgpdur > 1000) print "Long GP " starttime "us to " $3 "us (" curgpdur "us)"; - starttask = ""; + startseq = ""; } else { # Lost a message or some such, reset. - starttask = ""; + startseq = ""; nlost++; } } -$8 == "done" { +$8 == "done" && seqtask[$7] != $1 { piggybackcnt[$1]++; } diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh index 8f3121afc716..ccebf772fa1e 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh @@ -23,7 +23,7 @@ # Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> i="$1" -if test -d $i +if test -d "$i" -a -r "$i" then : else @@ -31,7 +31,7 @@ else exit 1 fi PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH -. tools/testing/selftests/rcutorture/bin/functions.sh +. functions.sh if kvm-recheck-rcuperf-ftrace.sh $i then diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index f659346d3358..f7e988f369dd 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -25,7 +25,7 @@ # Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH -. tools/testing/selftests/rcutorture/bin/functions.sh +. functions.sh for rd in "$@" do firsttime=1 diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index ab14b97c942c..5f8fbb0d7c17 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -42,7 +42,7 @@ T=${TMPDIR-/tmp}/kvm-test-1-run.sh.$$ trap 'rm -rf $T' 0 mkdir $T -. $KVM/bin/functions.sh +. functions.sh . $CONFIGFRAG/ver_functions.sh config_template=${1} @@ -154,9 +154,7 @@ cpu_count=`configfrag_boot_cpus "$boot_args" "$config_template" "$cpu_count"` vcpus=`identify_qemu_vcpus` if test $cpu_count -gt $vcpus then - echo CPU count limited from $cpu_count to $vcpus - touch $resdir/Warnings - echo CPU count limited from $cpu_count to $vcpus >> $resdir/Warnings + echo CPU count limited from $cpu_count to $vcpus | tee -a $resdir/Warnings cpu_count=$vcpus fi qemu_args="`specify_qemu_cpus "$QEMU" "$qemu_args" "$cpu_count"`" @@ -179,8 +177,8 @@ then exit 0 fi echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log -echo $QEMU $qemu_args -m 512 -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd -( $QEMU $qemu_args -m 512 -kernel $KERNEL -append "$qemu_append $boot_args"& echo $! > $resdir/qemu_pid; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) & +echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd +( $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append "$qemu_append $boot_args"& echo $! > $resdir/qemu_pid; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) & commandcompleted=0 sleep 10 # Give qemu's pid a chance to reach the file if test -s "$resdir/qemu_pid" diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index ccd49e958fd2..56610dbbdf73 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -1,11 +1,8 @@ #!/bin/bash # -# Run a series of 14 tests under KVM. These are not particularly -# well-selected or well-tuned, but are the current set. Run from the -# top level of the source tree. -# -# Edit the definitions below to set the locations of the various directories, -# as well as the test duration. +# Run a series of tests under KVM. By default, this series is specified +# by the relevant CFLIST file, but can be overridden by the --configs +# command-line argument. # # Usage: kvm.sh [ options ] # @@ -34,6 +31,8 @@ T=${TMPDIR-/tmp}/kvm.sh.$$ trap 'rm -rf $T' 0 mkdir $T +cd `dirname $scriptname`/../../../../../ + dur=$((30*60)) dryrun="" KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM @@ -43,6 +42,7 @@ TORTURE_BOOT_IMAGE="" TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD TORTURE_KCONFIG_ARG="" TORTURE_KMAKE_ARG="" +TORTURE_QEMU_MEM=512 TORTURE_SHUTDOWN_GRACE=180 TORTURE_SUITE=rcu resdir="" @@ -69,8 +69,9 @@ usage () { echo " --kconfig Kconfig-options" echo " --kmake-arg kernel-make-arguments" echo " --mac nn:nn:nn:nn:nn:nn" + echo " --memory megabytes | nnnG" echo " --no-initrd" - echo " --qemu-args qemu-system-..." + echo " --qemu-args qemu-arguments" echo " --qemu-cmd qemu-system-..." echo " --results absolute-pathname" echo " --torture rcu" @@ -146,11 +147,16 @@ do TORTURE_QEMU_MAC=$2 shift ;; + --memory) + checkarg --memory "(memory size)" $# "$2" '^[0-9]\+[MG]\?$' error + TORTURE_QEMU_MEM=$2 + shift + ;; --no-initrd) TORTURE_INITRD=""; export TORTURE_INITRD ;; --qemu-args|--qemu-arg) - checkarg --qemu-args "-qemu args" $# "$2" '^-' '^error' + checkarg --qemu-args "(qemu arguments)" $# "$2" '^-' '^error' TORTURE_QEMU_ARG="$2" shift ;; @@ -173,6 +179,12 @@ do checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuperf\)$' '^--' TORTURE_SUITE=$2 shift + if test "$TORTURE_SUITE" = rcuperf + then + # If you really want jitter for rcuperf, specify + # it after specifying rcuperf. (But why?) + jitter=0 + fi ;; *) echo Unknown argument $1 @@ -238,7 +250,6 @@ BEGIN { } END { - alldone = 0; batch = 0; nc = -1; @@ -288,6 +299,7 @@ TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE TORTURE_QEMU_MAC="$TORTURE_QEMU_MAC"; export TORTURE_QEMU_MAC +TORTURE_QEMU_MEM="$TORTURE_QEMU_MEM"; export TORTURE_QEMU_MEM TORTURE_SHUTDOWN_GRACE="$TORTURE_SHUTDOWN_GRACE"; export TORTURE_SHUTDOWN_GRACE TORTURE_SUITE="$TORTURE_SUITE"; export TORTURE_SUITE if ! test -e $resdir @@ -331,8 +343,7 @@ awk < $T/cfgcpu.pack \ # Dump out the scripting required to run one test batch. function dump(first, pastlast, batchnum) { - print "echo ----Start batch " batchnum ": `date`"; - print "echo ----Start batch " batchnum ": `date` >> " rd "/log"; + print "echo ----Start batch " batchnum ": `date` | tee -a " rd "log"; print "needqemurun=" jn=1 for (j = first; j < pastlast; j++) { @@ -349,21 +360,18 @@ function dump(first, pastlast, batchnum) ovf = "-ovf"; else ovf = ""; - print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date`"; - print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date` >> " rd "/log"; + print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date` | tee -a " rd "log"; print "rm -f " builddir ".*"; print "touch " builddir ".wait"; print "mkdir " builddir " > /dev/null 2>&1 || :"; print "mkdir " rd cfr[jn] " || :"; print "kvm-test-1-run.sh " CONFIGDIR cf[j], builddir, rd cfr[jn], dur " \"" TORTURE_QEMU_ARG "\" \"" TORTURE_BOOTARGS "\" > " rd cfr[jn] "/kvm-test-1-run.sh.out 2>&1 &" - print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date`"; - print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date` >> " rd "/log"; + print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date` | tee -a " rd "log"; print "while test -f " builddir ".wait" print "do" print "\tsleep 1" print "done" - print "echo ", cfr[jn], cpusr[jn] ovf ": Build complete. `date`"; - print "echo ", cfr[jn], cpusr[jn] ovf ": Build complete. `date` >> " rd "/log"; + print "echo ", cfr[jn], cpusr[jn] ovf ": Build complete. `date` | tee -a " rd "log"; jn++; } for (j = 1; j < jn; j++) { @@ -371,8 +379,7 @@ function dump(first, pastlast, batchnum) print "rm -f " builddir ".ready" print "if test -f \"" rd cfr[j] "/builtkernel\"" print "then" - print "\techo ----", cfr[j], cpusr[j] ovf ": Kernel present. `date`"; - print "\techo ----", cfr[j], cpusr[j] ovf ": Kernel present. `date` >> " rd "/log"; + print "\techo ----", cfr[j], cpusr[j] ovf ": Kernel present. `date` | tee -a " rd "log"; print "\tneedqemurun=1" print "fi" } @@ -386,31 +393,26 @@ function dump(first, pastlast, batchnum) njitter = ja[1]; if (TORTURE_BUILDONLY && njitter != 0) { njitter = 0; - print "echo Build-only run, so suppressing jitter >> " rd "/log" + print "echo Build-only run, so suppressing jitter | tee -a " rd "log" } if (TORTURE_BUILDONLY) { print "needqemurun=" } print "if test -n \"$needqemurun\"" print "then" - print "\techo ---- Starting kernels. `date`"; - print "\techo ---- Starting kernels. `date` >> " rd "/log"; + print "\techo ---- Starting kernels. `date` | tee -a " rd "log"; for (j = 0; j < njitter; j++) print "\tjitter.sh " j " " dur " " ja[2] " " ja[3] "&" print "\twait" - print "\techo ---- All kernel runs complete. `date`"; - print "\techo ---- All kernel runs complete. `date` >> " rd "/log"; + print "\techo ---- All kernel runs complete. `date` | tee -a " rd "log"; print "else" print "\twait" - print "\techo ---- No kernel runs. `date`"; - print "\techo ---- No kernel runs. `date` >> " rd "/log"; + print "\techo ---- No kernel runs. `date` | tee -a " rd "log"; print "fi" for (j = 1; j < jn; j++) { builddir=KVM "/b" j - print "echo ----", cfr[j], cpusr[j] ovf ": Build/run results:"; - print "echo ----", cfr[j], cpusr[j] ovf ": Build/run results: >> " rd "/log"; - print "cat " rd cfr[j] "/kvm-test-1-run.sh.out"; - print "cat " rd cfr[j] "/kvm-test-1-run.sh.out >> " rd "/log"; + print "echo ----", cfr[j], cpusr[j] ovf ": Build/run results: | tee -a " rd "log"; + print "cat " rd cfr[j] "/kvm-test-1-run.sh.out | tee -a " rd "log"; } } diff --git a/tools/testing/selftests/rcutorture/bin/parse-torture.sh b/tools/testing/selftests/rcutorture/bin/parse-torture.sh index f12c38909b00..5987e50cfeb4 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-torture.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-torture.sh @@ -55,7 +55,7 @@ then exit fi -grep --binary-files=text 'torture:.*ver:' $file | grep --binary-files=text -v '(null)' | sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' | +grep --binary-files=text 'torture:.*ver:' $file | egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' | sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' | awk ' BEGIN { ver = 0; diff --git a/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh index 252aae618984..80eb646e1319 100644 --- a/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh @@ -38,6 +38,5 @@ per_version_boot_params () { echo $1 `locktorture_param_onoff "$1" "$2"` \ locktorture.stat_interval=15 \ locktorture.shutdown_secs=$3 \ - locktorture.torture_runnable=1 \ locktorture.verbose=1 } diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 index c70c51d5ded1..28568b72a31b 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 @@ -9,5 +9,4 @@ CONFIG_PREEMPT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=y -CONFIG_NO_HZ_FULL_ALL=y #CHECK#CONFIG_RCU_EXPERT=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot index cd2a188eeb6d..838297c58318 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot @@ -1 +1 @@ -rcutorture.torture_type=tasks +rcutorture.torture_type=tasks nohz_full=1 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 index 27d22695d64c..24c9f6012e35 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 @@ -7,7 +7,6 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=y -CONFIG_NO_HZ_FULL_ALL=y CONFIG_RCU_FAST_NO_HZ=y CONFIG_RCU_TRACE=y CONFIG_HOTPLUG_CPU=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot index e34c33430447..e6071bb96c7d 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot @@ -1 +1 @@ -rcutorture.torture_type=rcu_bh rcutree.rcu_fanout_leaf=4 +rcutorture.torture_type=rcu_bh rcutree.rcu_fanout_leaf=4 nohz_full=1-7 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 index 0f4759f4232e..d7afb271a586 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 @@ -7,7 +7,6 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=y -CONFIG_NO_HZ_FULL_ALL=n CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_TRACE=y CONFIG_HOTPLUG_CPU=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh index ffb85ed786fa..24ec91041957 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh @@ -51,7 +51,6 @@ per_version_boot_params () { `rcutorture_param_n_barrier_cbs "$1"` \ rcutorture.stat_interval=15 \ rcutorture.shutdown_secs=$3 \ - rcutorture.torture_runnable=1 \ rcutorture.test_no_idle_hz=1 \ rcutorture.verbose=1 } diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh index 34f2a1b35ee5..d36b8fd6f0fc 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh @@ -20,33 +20,10 @@ # # Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> -# rcuperf_param_nreaders bootparam-string -# -# Adds nreaders rcuperf module parameter if not already specified. -rcuperf_param_nreaders () { - if ! echo "$1" | grep -q "rcuperf.nreaders" - then - echo rcuperf.nreaders=-1 - fi -} - -# rcuperf_param_nwriters bootparam-string -# -# Adds nwriters rcuperf module parameter if not already specified. -rcuperf_param_nwriters () { - if ! echo "$1" | grep -q "rcuperf.nwriters" - then - echo rcuperf.nwriters=-1 - fi -} - # per_version_boot_params bootparam-string config-file seconds # # Adds per-version torture-module parameters to kernels supporting them. per_version_boot_params () { - echo $1 `rcuperf_param_nreaders "$1"` \ - `rcuperf_param_nwriters "$1"` \ - rcuperf.perf_runnable=1 \ - rcuperf.shutdown=1 \ + echo $1 rcuperf.shutdown=1 \ rcuperf.verbose=1 } diff --git a/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt b/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt index 66efb59a1bd1..449cf579d6f9 100644 --- a/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt +++ b/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt @@ -1,4 +1,4 @@ -This document describes one way to created the rcu-test-image file +This document describes one way to create the rcu-test-image file that contains the filesystem used by the guest-OS kernel. There are probably much better ways of doing this, and this filesystem could no doubt be smaller. It is probably also possible to simply download diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 24dbf634e2dd..168c66d74fc5 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -141,6 +141,15 @@ struct seccomp_data { #define SECCOMP_FILTER_FLAG_LOG 2 #endif +#ifndef PTRACE_SECCOMP_GET_METADATA +#define PTRACE_SECCOMP_GET_METADATA 0x420d + +struct seccomp_metadata { + __u64 filter_off; /* Input: which filter */ + __u64 flags; /* Output: filter's flags */ +}; +#endif + #ifndef seccomp int seccomp(unsigned int op, unsigned int flags, void *args) { @@ -1717,7 +1726,7 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, if (nr == __NR_getpid) change_syscall(_metadata, tracee, __NR_getppid); - if (nr == __NR_open) + if (nr == __NR_openat) change_syscall(_metadata, tracee, -1); } @@ -1792,7 +1801,7 @@ TEST_F(TRACE_syscall, ptrace_syscall_dropped) true); /* Tracer should skip the open syscall, resulting in EPERM. */ - EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_open)); + EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_openat)); } TEST_F(TRACE_syscall, syscall_allowed) @@ -2845,6 +2854,69 @@ TEST(get_action_avail) EXPECT_EQ(errno, EOPNOTSUPP); } +TEST(get_metadata) +{ + pid_t pid; + int pipefd[2]; + char buf; + struct seccomp_metadata md; + long ret; + + ASSERT_EQ(0, pipe(pipefd)); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) { + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + /* one with log, one without */ + ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, + SECCOMP_FILTER_FLAG_LOG, &prog)); + ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog)); + + ASSERT_EQ(0, close(pipefd[0])); + ASSERT_EQ(1, write(pipefd[1], "1", 1)); + ASSERT_EQ(0, close(pipefd[1])); + + while (1) + sleep(100); + } + + ASSERT_EQ(0, close(pipefd[1])); + ASSERT_EQ(1, read(pipefd[0], &buf, 1)); + + ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid)); + ASSERT_EQ(pid, waitpid(pid, NULL, 0)); + + /* Past here must not use ASSERT or child process is never killed. */ + + md.filter_off = 0; + errno = 0; + ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); + EXPECT_EQ(sizeof(md), ret) { + if (errno == EINVAL) + XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)"); + } + + EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG); + EXPECT_EQ(md.filter_off, 0); + + md.filter_off = 1; + ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); + EXPECT_EQ(sizeof(md), ret); + EXPECT_EQ(md.flags, 0); + EXPECT_EQ(md.filter_off, 1); + +skip: + ASSERT_EQ(0, kill(pid, SIGKILL)); +} + /* * TODO: * - add microbenchmarks diff --git a/tools/testing/selftests/sync/Makefile b/tools/testing/selftests/sync/Makefile index b3c8ba3cb668..d0121a8a3523 100644 --- a/tools/testing/selftests/sync/Makefile +++ b/tools/testing/selftests/sync/Makefile @@ -30,7 +30,7 @@ $(TEST_CUSTOM_PROGS): $(TESTS) $(OBJS) $(CC) -o $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS) $(CFLAGS) $(LDFLAGS) $(OBJS): $(OUTPUT)/%.o: %.c - $(CC) -c $^ -o $@ + $(CC) -c $^ -o $@ $(CFLAGS) $(TESTS): $(OUTPUT)/%.o: %.c $(CC) -c $^ -o $@ diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README index 970ff294fec8..3a0336782d2d 100644 --- a/tools/testing/selftests/tc-testing/README +++ b/tools/testing/selftests/tc-testing/README @@ -14,11 +14,11 @@ REQUIREMENTS * The kernel must have network namespace support -* The kernel must have veth support available, as a veth pair is created +* The kernel must have veth support available, as a veth pair is created prior to running the tests. -* All tc-related features must be built in or available as modules. - To check what is required in current setup run: +* All tc-related features being tested must be built in or available as + modules. To check what is required in current setup run: ./tdc.py -c Note: @@ -44,10 +44,13 @@ using the -p option when running tdc: RUNNING TDC ----------- -To use tdc, root privileges are required. tdc will not run otherwise. +To use tdc, root privileges are required. This is because the +commands being tested must be run as root. The code that enforces +execution by root uid has been moved into a plugin (see PLUGIN +ARCHITECTURE, below). -All tests are executed inside a network namespace to prevent conflicts -within the host. +If nsPlugin is linked, all tests are executed inside a network +namespace to prevent conflicts within the host. Running tdc without any arguments will run all tests. Refer to the section on command line arguments for more information, or run: @@ -59,6 +62,33 @@ output captured from the failing test will be printed immediately following the failed test in the TAP output. +OVERVIEW OF TDC EXECUTION +------------------------- + +One run of tests is considered a "test suite" (this will be refined in the +future). A test suite has one or more test cases in it. + +A test case has four stages: + + - setup + - execute + - verify + - teardown + +The setup and teardown stages can run zero or more commands. The setup +stage does some setup if the test needs it. The teardown stage undoes +the setup and returns the system to a "neutral" state so any other test +can be run next. These two stages require any commands run to return +success, but do not otherwise verify the results. + +The execute and verify stages each run one command. The execute stage +tests the return code against one or more acceptable values. The +verify stage checks the return code for success, and also compares +the stdout with a regular expression. + +Each of the commands in any stage will run in a shell instance. + + USER-DEFINED CONSTANTS ---------------------- @@ -70,23 +100,132 @@ executed as part of the test. More will be added as test cases require. Example: $TC qdisc add dev $DEV1 ingress +The NAMES values are used to substitute into the commands in the test cases. + COMMAND LINE ARGUMENTS ---------------------- Run tdc.py -h to see the full list of available arguments. --p PATH Specify the tc executable located at PATH to be used on this - test run --c Show the available test case categories in this test file --c CATEGORY Run only tests that belong to CATEGORY --f FILE Read test cases from the JSON file named FILE --l [CATEGORY] List all test cases in the JSON file. If CATEGORY is - specified, list test cases matching that category. --s ID Show the test case matching ID --e ID Execute the test case identified by ID --i Generate unique ID numbers for test cases with no existing - ID number +usage: tdc.py [-h] [-p PATH] [-D DIR [DIR ...]] [-f FILE [FILE ...]] + [-c [CATG [CATG ...]]] [-e ID [ID ...]] [-l] [-s] [-i] [-v] + [-d DEVICE] [-n NS] [-V] + +Linux TC unit tests + +optional arguments: + -h, --help show this help message and exit + -p PATH, --path PATH The full path to the tc executable to use + -v, --verbose Show the commands that are being run + -d DEVICE, --device DEVICE + Execute the test case in flower category + +selection: + select which test cases: files plus directories; filtered by categories + plus testids + + -D DIR [DIR ...], --directory DIR [DIR ...] + Collect tests from the specified directory(ies) + (default [tc-tests]) + -f FILE [FILE ...], --file FILE [FILE ...] + Run tests from the specified file(s) + -c [CATG [CATG ...]], --category [CATG [CATG ...]] + Run tests only from the specified category/ies, or if + no category/ies is/are specified, list known + categories. + -e ID [ID ...], --execute ID [ID ...] + Execute the specified test cases with specified IDs + +action: + select action to perform on selected test cases + + -l, --list List all test cases, or those only within the + specified category + -s, --show Display the selected test cases + -i, --id Generate ID numbers for new test cases + +netns: + options for nsPlugin(run commands in net namespace) + + -n NS, --namespace NS + Run commands in namespace NS + +valgrind: + options for valgrindPlugin (run command under test under Valgrind) + + -V, --valgrind Run commands under valgrind + + +PLUGIN ARCHITECTURE +------------------- + +There is now a plugin architecture, and some of the functionality that +was in the tdc.py script has been moved into the plugins. + +The plugins are in the directory plugin-lib. The are executed from +directory plugins. Put symbolic links from plugins to plugin-lib, +and name them according to the order you want them to run. + +Example: + +bjb@bee:~/work/tc-testing$ ls -l plugins +total 4 +lrwxrwxrwx 1 bjb bjb 27 Oct 4 16:12 10-rootPlugin.py -> ../plugin-lib/rootPlugin.py +lrwxrwxrwx 1 bjb bjb 25 Oct 12 17:55 20-nsPlugin.py -> ../plugin-lib/nsPlugin.py +-rwxr-xr-x 1 bjb bjb 0 Sep 29 15:56 __init__.py + +The plugins are a subclass of TdcPlugin, defined in TdcPlugin.py and +must be called "SubPlugin" so tdc can find them. They are +distinguished from each other in the python program by their module +name. + +This base class supplies "hooks" to run extra functions. These hooks are as follows: + +pre- and post-suite +pre- and post-case +pre- and post-execute stage +adjust-command (runs in all stages and receives the stage name) + +The pre-suite hook receives the number of tests and an array of test ids. +This allows you to dump out the list of skipped tests in the event of a +failure during setup or teardown stage. + +The pre-case hook receives the ordinal number and test id of the current test. + +The adjust-command hook receives the stage id (see list below) and the +full command to be executed. This allows for last-minute adjustment +of the command. + +The stages are identified by the following strings: + + - pre (pre-suite) + - setup + - command + - verify + - teardown + - post (post-suite) + + +To write a plugin, you need to inherit from TdcPlugin in +TdcPlugin.py. To use the plugin, you have to put the +implementation file in plugin-lib, and add a symbolic link to it from +plugins. It will be detected at run time and invoked at the +appropriate times. There are a few examples in the plugin-lib +directory: + + - rootPlugin.py: + implements the enforcement of running as root + - nsPlugin.py: + sets up a network namespace and runs all commands in that namespace + - valgrindPlugin.py + runs each command in the execute stage under valgrind, + and checks for leaks. + This plugin will output an extra test for each test in the test file, + one is the existing output as to whether the test passed or failed, + and the other is a test whether the command leaked memory or not. + (This one is a preliminary version, it may not work quite right yet, + but the overall template is there and it should only need tweaks.) ACKNOWLEDGEMENTS diff --git a/tools/testing/selftests/tc-testing/TODO.txt b/tools/testing/selftests/tc-testing/TODO.txt index 6a266d811a78..c40698557e2f 100644 --- a/tools/testing/selftests/tc-testing/TODO.txt +++ b/tools/testing/selftests/tc-testing/TODO.txt @@ -5,6 +5,27 @@ tc Testing Suite To-Do list: - Add support for multiple versions of tc to run successively -- Improve error messages when tdc aborts its run +- Improve error messages when tdc aborts its run. Partially done - still + need to better handle problems in pre- and post-suite. -- Allow tdc to write its results to file +- Use python logger module for debug/verbose output + +- Allow tdc to write its results to file. + Maybe use python logger module for this too. + +- A better implementation of the "hooks". Currently, every plugin + will attempt to run a function at every hook point. Could be + changed so that plugin __init__ methods will register functions to + be run in the various predefined times. Then if a plugin does not + require action at a specific point, no penalty will be paid for + trying to run a function that will do nothing. + +- Proper exception handling - make an exception class and use it + +- a TestCase class, for easier testcase handling, searching, comparison + +- a TestSuite class + and a way to configure a test suite, + to automate running multiple "test suites" with different requirements + +- super simple test case example using ls, touch, etc diff --git a/tools/testing/selftests/tc-testing/TdcPlugin.py b/tools/testing/selftests/tc-testing/TdcPlugin.py new file mode 100644 index 000000000000..3ee9a6dacb52 --- /dev/null +++ b/tools/testing/selftests/tc-testing/TdcPlugin.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 + +class TdcPlugin: + def __init__(self): + super().__init__() + print(' -- {}.__init__'.format(self.sub_class)) + + def pre_suite(self, testcount, testidlist): + '''run commands before test_runner goes into a test loop''' + self.testcount = testcount + self.testidlist = testidlist + if self.args.verbose > 1: + print(' -- {}.pre_suite'.format(self.sub_class)) + + def post_suite(self, index): + '''run commands after test_runner completes the test loop + index is the last ordinal number of test that was attempted''' + if self.args.verbose > 1: + print(' -- {}.post_suite'.format(self.sub_class)) + + def pre_case(self, test_ordinal, testid): + '''run commands before test_runner does one test''' + if self.args.verbose > 1: + print(' -- {}.pre_case'.format(self.sub_class)) + self.args.testid = testid + self.args.test_ordinal = test_ordinal + + def post_case(self): + '''run commands after test_runner does one test''' + if self.args.verbose > 1: + print(' -- {}.post_case'.format(self.sub_class)) + + def pre_execute(self): + '''run command before test-runner does the execute step''' + if self.args.verbose > 1: + print(' -- {}.pre_execute'.format(self.sub_class)) + + def post_execute(self): + '''run command after test-runner does the execute step''' + if self.args.verbose > 1: + print(' -- {}.post_execute'.format(self.sub_class)) + + def adjust_command(self, stage, command): + '''adjust the command''' + if self.args.verbose > 1: + print(' -- {}.adjust_command {}'.format(self.sub_class, stage)) + + # if stage == 'pre': + # pass + # elif stage == 'setup': + # pass + # elif stage == 'execute': + # pass + # elif stage == 'verify': + # pass + # elif stage == 'teardown': + # pass + # elif stage == 'post': + # pass + # else: + # pass + + return command + + def add_args(self, parser): + '''Get the plugin args from the command line''' + self.argparser = parser + return self.argparser + + def check_args(self, args, remaining): + '''Check that the args are set correctly''' + self.args = args + if self.args.verbose > 1: + print(' -- {}.check_args'.format(self.sub_class)) diff --git a/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt b/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt new file mode 100644 index 000000000000..c18f88d09360 --- /dev/null +++ b/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt @@ -0,0 +1,104 @@ +tdc - Adding plugins for tdc + +Author: Brenda J. Butler - bjb@mojatatu.com + +ADDING PLUGINS +-------------- + +A new plugin should be written in python as a class that inherits from TdcPlugin. +There are some examples in plugin-lib. + +The plugin can be used to add functionality to the test framework, +such as: + +- adding commands to be run before and/or after the test suite +- adding commands to be run before and/or after the test cases +- adding commands to be run before and/or after the execute phase of the test cases +- ability to alter the command to be run in any phase: + pre (the pre-suite stage) + prepare + execute + verify + teardown + post (the post-suite stage) +- ability to add to the command line args, and use them at run time + + +The functions in the class should follow the following interfaces: + + def __init__(self) + def pre_suite(self, testcount, testidlist) # see "PRE_SUITE" below + def post_suite(self, ordinal) # see "SKIPPING" below + def pre_case(self, test_ordinal, testid) # see "PRE_CASE" below + def post_case(self) + def pre_execute(self) + def post_execute(self) + def adjust_command(self, stage, command) # see "ADJUST" below + def add_args(self, parser) # see "ADD_ARGS" below + def check_args(self, args, remaining) # see "CHECK_ARGS" below + + +PRE_SUITE + +This method takes a testcount (number of tests to be run) and +testidlist (array of test ids for tests that will be run). This is +useful for various things, including when an exception occurs and the +rest of the tests must be skipped. The info is stored in the object, +and the post_suite method can refer to it when dumping the "skipped" +TAP output. The tdc.py script will do that for the test suite as +defined in the test case, but if the plugin is being used to run extra +tests on each test (eg, check for memory leaks on associated +co-processes) then that other tap output can be generated in the +post-suite method using this info passed in to the pre_suite method. + + +SKIPPING + +The post_suite method will receive the ordinal number of the last +test to be attempted. It can use this info when outputting +the TAP output for the extra test cases. + + +PRE_CASE + +The pre_case method will receive the ordinal number of the test +and the test id. Useful for outputing the extra test results. + + +ADJUST + +The adjust_command method receives a string representing +the execution stage and a string which is the actual command to be +executed. The plugin can adjust the command, based on the stage of +execution. + +The stages are represented by the following strings: + + 'pre' + 'setup' + 'command' + 'verify' + 'teardown' + 'post' + +The adjust_command method must return the adjusted command so tdc +can use it. + + +ADD_ARGS + +The add_args method receives the argparser object and can add +arguments to it. Care should be taken that the new arguments do not +conflict with any from tdc.py or from other plugins that will be used +concurrently. + +The add_args method should return the argparser object. + + +CHECK_ARGS + +The check_args method is so that the plugin can do validation on +the args, if needed. If there is a problem, and Exception should +be raised, with a string that explains the problem. + +eg: raise Exception('plugin xxx, arg -y is wrong, fix it') diff --git a/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt b/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt index 00438331ba47..17b267dedbd9 100644 --- a/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt +++ b/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt @@ -12,14 +12,18 @@ template.json for the required JSON format for test cases. Include the 'id' field, but do not assign a value. Running tdc with the -i option will generate a unique ID for that test case. -tdc will recursively search the 'tc' subdirectory for .json files. Any -test case files you create in these directories will automatically be included. -If you wish to store your custom test cases elsewhere, be sure to run tdc -with the -f argument and the path to your file. +tdc will recursively search the 'tc-tests' subdirectory (or the +directories named with the -D option) for .json files. Any test case +files you create in these directories will automatically be included. +If you wish to store your custom test cases elsewhere, be sure to run +tdc with the -f argument and the path to your file, or the -D argument +and the path to your directory(ies). -Be aware of required escape characters in the JSON data - particularly when -defining the match pattern. Refer to the tctests.json file for examples when -in doubt. +Be aware of required escape characters in the JSON data - particularly +when defining the match pattern. Refer to the supplied json test files +for examples when in doubt. The match pattern is written in json, and +will be used by python. So the match pattern will be a python regular +expression, but should be written using json syntax. TEST CASE STRUCTURE @@ -69,7 +73,8 @@ SETUP/TEARDOWN ERRORS If an error is detected during the setup/teardown process, execution of the tests will immediately stop with an error message and the namespace in which the tests are run will be destroyed. This is to prevent inaccurate results -in the test cases. +in the test cases. tdc will output a series of TAP results for the skipped +tests. Repeated failures of the setup/teardown may indicate a problem with the test case, or possibly even a bug in one of the commands that are not being tested. @@ -79,3 +84,17 @@ so that it doesn't halt the script for an error that doesn't matter. Turn the individual command into a list, with the command being first, followed by all acceptable exit codes for the command. +Example: + +A pair of setup commands. The first can have exit code 0, 1 or 255, the +second must have exit code 0. + + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC actions add action reclassify index 65536" + ], diff --git a/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS b/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS new file mode 100644 index 000000000000..aa8a2669702b --- /dev/null +++ b/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS @@ -0,0 +1,27 @@ +tdc.py will look for plugins in a directory plugins off the cwd. +Make a set of numbered symbolic links from there to the actual plugins. +Eg: + +tdc.py +plugin-lib/ +plugins/ + __init__.py + 10-rootPlugin.py -> ../plugin-lib/rootPlugin.py + 20-valgrindPlugin.py -> ../plugin-lib/valgrindPlugin.py + 30-nsPlugin.py -> ../plugin-lib/nsPlugin.py + + +tdc.py will find them and use them. + + +rootPlugin + Check if the uid is root. If not, bail out. + +valgrindPlugin + Run the command under test with valgrind, and produce an extra set of TAP results for the memory tests. + This plugin will write files to the cwd, called vgnd-xxx.log. These will contain + the valgrind output for test xxx. Any file matching the glob 'vgnd-*.log' will be + deleted at the end of the run. + +nsPlugin + Run all the commands in a network namespace. diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py new file mode 100644 index 000000000000..a194b1af2b30 --- /dev/null +++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py @@ -0,0 +1,141 @@ +import os +import signal +from string import Template +import subprocess +import time +from TdcPlugin import TdcPlugin + +from tdc_config import * + +class SubPlugin(TdcPlugin): + def __init__(self): + self.sub_class = 'ns/SubPlugin' + super().__init__() + + def pre_suite(self, testcount, testidlist): + '''run commands before test_runner goes into a test loop''' + super().pre_suite(testcount, testidlist) + + if self.args.namespace: + self._ns_create() + + def post_suite(self, index): + '''run commands after test_runner goes into a test loop''' + super().post_suite(index) + if self.args.verbose: + print('{}.post_suite'.format(self.sub_class)) + + if self.args.namespace: + self._ns_destroy() + + def add_args(self, parser): + super().add_args(parser) + self.argparser_group = self.argparser.add_argument_group( + 'netns', + 'options for nsPlugin(run commands in net namespace)') + self.argparser_group.add_argument( + '-n', '--namespace', action='store_true', + help='Run commands in namespace') + return self.argparser + + def adjust_command(self, stage, command): + super().adjust_command(stage, command) + cmdform = 'list' + cmdlist = list() + + if not self.args.namespace: + return command + + if self.args.verbose: + print('{}.adjust_command'.format(self.sub_class)) + + if not isinstance(command, list): + cmdform = 'str' + cmdlist = command.split() + else: + cmdlist = command + if stage == 'setup' or stage == 'execute' or stage == 'verify' or stage == 'teardown': + if self.args.verbose: + print('adjust_command: stage is {}; inserting netns stuff in command [{}] list [{}]'.format(stage, command, cmdlist)) + cmdlist.insert(0, self.args.NAMES['NS']) + cmdlist.insert(0, 'exec') + cmdlist.insert(0, 'netns') + cmdlist.insert(0, 'ip') + else: + pass + + if cmdform == 'str': + command = ' '.join(cmdlist) + else: + command = cmdlist + + if self.args.verbose: + print('adjust_command: return command [{}]'.format(command)) + return command + + def _ns_create(self): + ''' + Create the network namespace in which the tests will be run and set up + the required network devices for it. + ''' + if self.args.namespace: + cmd = 'ip netns add {}'.format(self.args.NAMES['NS']) + self._exec_cmd('pre', cmd) + cmd = 'ip link add $DEV0 type veth peer name $DEV1' + self._exec_cmd('pre', cmd) + cmd = 'ip link set $DEV1 netns {}'.format(self.args.NAMES['NS']) + self._exec_cmd('pre', cmd) + cmd = 'ip link set $DEV0 up' + self._exec_cmd('pre', cmd) + cmd = 'ip -n {} link set $DEV1 up'.format(self.args.NAMES['NS']) + self._exec_cmd('pre', cmd) + if self.args.device: + cmd = 'ip link set $DEV2 netns {}'.format(self.args.NAMES['NS']) + self._exec_cmd('pre', cmd) + cmd = 'ip -n {} link set $DEV2 up'.format(self.args.NAMES['NS']) + self._exec_cmd('pre', cmd) + + def _ns_destroy(self): + ''' + Destroy the network namespace for testing (and any associated network + devices as well) + ''' + if self.args.namespace: + cmd = 'ip netns delete {}'.format(self.args.NAMES['NS']) + self._exec_cmd('post', cmd) + + def _exec_cmd(self, stage, command): + ''' + Perform any required modifications on an executable command, then run + it in a subprocess and return the results. + ''' + if '$' in command: + command = self._replace_keywords(command) + + self.adjust_command(stage, command) + if self.args.verbose: + print('_exec_cmd: command "{}"'.format(command)) + proc = subprocess.Popen(command, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=ENVIR) + (rawout, serr) = proc.communicate() + + if proc.returncode != 0 and len(serr) > 0: + foutput = serr.decode("utf-8") + else: + foutput = rawout.decode("utf-8") + + proc.stdout.close() + proc.stderr.close() + return proc, foutput + + def _replace_keywords(self, cmd): + """ + For a given executable command, substitute any known + variables contained within NAMES with the correct values + """ + tcmd = Template(cmd) + subcmd = tcmd.safe_substitute(self.args.NAMES) + return subcmd diff --git a/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py new file mode 100644 index 000000000000..e36775bd4d12 --- /dev/null +++ b/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py @@ -0,0 +1,19 @@ +import os +import sys +from TdcPlugin import TdcPlugin + +from tdc_config import * + + +class SubPlugin(TdcPlugin): + def __init__(self): + self.sub_class = 'root/SubPlugin' + super().__init__() + + def pre_suite(self, testcount, testidlist): + # run commands before test_runner goes into a test loop + super().pre_suite(testcount, testidlist) + + if os.geteuid(): + print('This script must be run with root privileges', file=sys.stderr) + exit(1) diff --git a/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py new file mode 100644 index 000000000000..477a7bd7d7fb --- /dev/null +++ b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py @@ -0,0 +1,142 @@ +''' +run the command under test, under valgrind and collect memory leak info +as a separate test. +''' + + +import os +import re +import signal +from string import Template +import subprocess +import time +from TdcPlugin import TdcPlugin + +from tdc_config import * + +def vp_extract_num_from_string(num_as_string_maybe_with_commas): + return int(num_as_string_maybe_with_commas.replace(',','')) + +class SubPlugin(TdcPlugin): + def __init__(self): + self.sub_class = 'valgrind/SubPlugin' + self.tap = '' + super().__init__() + + def pre_suite(self, testcount, testidlist): + '''run commands before test_runner goes into a test loop''' + super().pre_suite(testcount, testidlist) + if self.args.verbose > 1: + print('{}.pre_suite'.format(self.sub_class)) + if self.args.valgrind: + self._add_to_tap('1..{}\n'.format(self.testcount)) + + def post_suite(self, index): + '''run commands after test_runner goes into a test loop''' + super().post_suite(index) + self._add_to_tap('\n|---\n') + if self.args.verbose > 1: + print('{}.post_suite'.format(self.sub_class)) + print('{}'.format(self.tap)) + if self.args.verbose < 4: + subprocess.check_output('rm -f vgnd-*.log', shell=True) + + def add_args(self, parser): + super().add_args(parser) + self.argparser_group = self.argparser.add_argument_group( + 'valgrind', + 'options for valgrindPlugin (run command under test under Valgrind)') + + self.argparser_group.add_argument( + '-V', '--valgrind', action='store_true', + help='Run commands under valgrind') + + return self.argparser + + def adjust_command(self, stage, command): + super().adjust_command(stage, command) + cmdform = 'list' + cmdlist = list() + + if not self.args.valgrind: + return command + + if self.args.verbose > 1: + print('{}.adjust_command'.format(self.sub_class)) + + if not isinstance(command, list): + cmdform = 'str' + cmdlist = command.split() + else: + cmdlist = command + + if stage == 'execute': + if self.args.verbose > 1: + print('adjust_command: stage is {}; inserting valgrind stuff in command [{}] list [{}]'. + format(stage, command, cmdlist)) + cmdlist.insert(0, '--track-origins=yes') + cmdlist.insert(0, '--show-leak-kinds=definite,indirect') + cmdlist.insert(0, '--leak-check=full') + cmdlist.insert(0, '--log-file=vgnd-{}.log'.format(self.args.testid)) + cmdlist.insert(0, '-v') # ask for summary of non-leak errors + cmdlist.insert(0, ENVIR['VALGRIND_BIN']) + else: + pass + + if cmdform == 'str': + command = ' '.join(cmdlist) + else: + command = cmdlist + + if self.args.verbose > 1: + print('adjust_command: return command [{}]'.format(command)) + return command + + def post_execute(self): + if not self.args.valgrind: + return + + self.definitely_lost_re = re.compile( + r'definitely lost:\s+([,0-9]+)\s+bytes in\s+([,0-9]+)\sblocks', re.MULTILINE | re.DOTALL) + self.indirectly_lost_re = re.compile( + r'indirectly lost:\s+([,0-9]+)\s+bytes in\s+([,0-9]+)\s+blocks', re.MULTILINE | re.DOTALL) + self.possibly_lost_re = re.compile( + r'possibly lost:\s+([,0-9]+)bytes in\s+([,0-9]+)\s+blocks', re.MULTILINE | re.DOTALL) + self.non_leak_error_re = re.compile( + r'ERROR SUMMARY:\s+([,0-9]+) errors from\s+([,0-9]+)\s+contexts', re.MULTILINE | re.DOTALL) + + def_num = 0 + ind_num = 0 + pos_num = 0 + nle_num = 0 + + # what about concurrent test runs? Maybe force them to be in different directories? + with open('vgnd-{}.log'.format(self.args.testid)) as vfd: + content = vfd.read() + def_mo = self.definitely_lost_re.search(content) + ind_mo = self.indirectly_lost_re.search(content) + pos_mo = self.possibly_lost_re.search(content) + nle_mo = self.non_leak_error_re.search(content) + + if def_mo: + def_num = int(def_mo.group(2)) + if ind_mo: + ind_num = int(ind_mo.group(2)) + if pos_mo: + pos_num = int(pos_mo.group(2)) + if nle_mo: + nle_num = int(nle_mo.group(1)) + + mem_results = '' + if (def_num > 0) or (ind_num > 0) or (pos_num > 0) or (nle_num > 0): + mem_results += 'not ' + + mem_results += 'ok {} - {}-mem # {}\n'.format( + self.args.test_ordinal, self.args.testid, 'memory leak check') + self._add_to_tap(mem_results) + if mem_results.startswith('not '): + print('{}'.format(content)) + self._add_to_tap(content) + + def _add_to_tap(self, more_tap_output): + self.tap += more_tap_output diff --git a/tools/testing/selftests/tc-testing/plugins/__init__.py b/tools/testing/selftests/tc-testing/plugins/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/testing/selftests/tc-testing/plugins/__init__.py diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json new file mode 100644 index 000000000000..5b012f4981d4 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json @@ -0,0 +1,289 @@ +[ + { + "id": "d959", + "name": "Add cBPF action with valid bytecode", + "category": [ + "actions", + "bpf" + ], + "setup": [ + [ + "$TC action flush action bpf", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 100", + "expExitCode": "0", + "verifyCmd": "$TC action get action bpf index 100", + "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 100 ref", + "matchCount": "1", + "teardown": [ + "$TC action flush action bpf" + ] + }, + { + "id": "f84a", + "name": "Add cBPF action with invalid bytecode", + "category": [ + "actions", + "bpf" + ], + "setup": [ + [ + "$TC actions flush action bpf", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,31 0 1 2048,6 0 0 262144,6 0 0 0' index 100", + "expExitCode": "255", + "verifyCmd": "$TC action get action bpf index 100", + "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,31 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 100 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action bpf" + ] + }, + { + "id": "e939", + "name": "Add eBPF action with valid object-file", + "category": [ + "actions", + "bpf" + ], + "setup": [ + "printf '#include <linux/bpf.h>\nchar l[] __attribute__((section(\"license\"),used))=\"GPL\"; __attribute__((section(\"action\"),used)) int m(struct __sk_buff *s) { return 2; }' | clang -O2 -x c -c - -target bpf -o _b.o", + [ + "$TC action flush action bpf", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC action add action bpf object-file _b.o index 667", + "expExitCode": "0", + "verifyCmd": "$TC action get action bpf index 667", + "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c default-action pipe.*index 667 ref", + "matchCount": "1", + "teardown": [ + "$TC action flush action bpf", + "rm -f _b.o" + ] + }, + { + "id": "282d", + "name": "Add eBPF action with invalid object-file", + "category": [ + "actions", + "bpf" + ], + "setup": [ + "printf '#include <linux/bpf.h>\nchar l[] __attribute__((section(\"license\"),used))=\"GPL\"; __attribute__((section(\"action\"),used)) int m(struct __sk_buff *s) { s->data = 0x0; return 2; }' | clang -O2 -x c -c - -target bpf -o _c.o", + [ + "$TC action flush action bpf", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC action add action bpf object-file _c.o index 667", + "expExitCode": "255", + "verifyCmd": "$TC action get action bpf index 667", + "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9].*index 667 ref", + "matchCount": "0", + "teardown": [ + "$TC action flush action bpf", + "rm -f _c.o" + ] + }, + { + "id": "d819", + "name": "Replace cBPF bytecode and action control", + "category": [ + "actions", + "bpf" + ], + "setup": [ + [ + "$TC actions flush action bpf", + 0, + 1, + 255 + ], + [ + "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 555", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC action replace action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' drop index 555", + "expExitCode": "0", + "verifyCmd": "$TC action get action bpf index 555", + "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' default-action drop.*index 555 ref", + "matchCount": "1", + "teardown": [ + "$TC action flush action bpf" + ] + }, + { + "id": "6ae3", + "name": "Delete cBPF action ", + "category": [ + "actions", + "bpf" + ], + "setup": [ + [ + "$TC actions flush action bpf", + 0, + 1, + 255 + ], + [ + "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 444", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC action delete action bpf index 444", + "expExitCode": "0", + "verifyCmd": "$TC action get action bpf index 444", + "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 444 ref", + "matchCount": "0", + "teardown": [ + "$TC action flush action bpf" + ] + }, + { + "id": "3e0d", + "name": "List cBPF actions", + "category": [ + "actions", + "bpf" + ], + "setup": [ + [ + "$TC action flush action bpf", + 0, + 1, + 255 + ], + "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' ok index 101", + "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' drop index 102", + "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 33024,6 0 0 262144,6 0 0 0' continue index 103" + ], + "cmdUnderTest": "$TC action list action bpf", + "expExitCode": "0", + "verifyCmd": "$TC action list action bpf", + "matchPattern": "action order [0-9]*: bpf bytecode", + "matchCount": "3", + "teardown": [ + "$TC actions flush action bpf" + ] + }, + { + "id": "55ce", + "name": "Flush BPF actions", + "category": [ + "actions", + "bpf" + ], + "setup": [ + [ + "$TC actions flush action bpf", + 0, + 1, + 255 + ], + "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' ok index 101", + "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' drop index 102", + "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 33024,6 0 0 262144,6 0 0 0' continue index 103" + ], + "cmdUnderTest": "$TC action flush action bpf", + "expExitCode": "0", + "verifyCmd": "$TC action list action bpf", + "matchPattern": "action order [0-9]*: bpf bytecode", + "matchCount": "0", + "teardown": [ + "$TC actions flush action bpf" + ] + }, + { + "id": "ccc3", + "name": "Add cBPF action with duplicate index", + "category": [ + "actions", + "bpf" + ], + "setup": [ + [ + "$TC actions flush action bpf", + 0, + 1, + 255 + ], + "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 4294967295" + ], + "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' index 4294967295", + "expExitCode": "255", + "verifyCmd": "$TC action get action bpf index 4294967295", + "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 4294967295", + "matchCount": "1", + "teardown": [ + "$TC action flush action bpf" + ] + }, + { + "id": "89c7", + "name": "Add cBPF action with invalid index", + "category": [ + "actions", + "bpf" + ], + "setup": [ + [ + "$TC actions flush action bpf", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' index 4294967296 cookie 12345", + "expExitCode": "255", + "verifyCmd": "$TC action ls action bpf", + "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*cookie 12345", + "matchCount": "0", + "teardown": [ + "$TC action flush action bpf" + ] + }, + { + "id": "7ab9", + "name": "Add cBPF action with cookie", + "category": [ + "actions", + "bpf" + ], + "setup": [ + [ + "$TC actions flush action bpf", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' cookie d0d0d0d0d0d0d0d0", + "expExitCode": "0", + "verifyCmd": "$TC action list action bpf", + "matchPattern": "action order [0-9]*: bpf.*cookie d0d0d0d0d0d0d0", + "matchCount": "1", + "teardown": [ + "$TC action flush action bpf" + ] + } +] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json b/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json new file mode 100644 index 000000000000..70952bd98ff9 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json @@ -0,0 +1,291 @@ +[ + { + "id": "2002", + "name": "Add valid connmark action with defaults", + "category": [ + "actions", + "connmark" + ], + "setup": [ + [ + "$TC actions flush action connmark", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action connmark", + "expExitCode": "0", + "verifyCmd": "$TC actions list action connmark", + "matchPattern": "action order [0-9]+: connmark zone 0 pipe", + "matchCount": "1", + "teardown": [ + "$TC actions flush action connmark" + ] + }, + { + "id": "56a5", + "name": "Add valid connmark action with control pass", + "category": [ + "actions", + "connmark" + ], + "setup": [ + [ + "$TC actions flush action connmark", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action connmark pass index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action connmark index 1", + "matchPattern": "action order [0-9]+: connmark zone 0 pass.*index 1 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action connmark" + ] + }, + { + "id": "7c66", + "name": "Add valid connmark action with control drop", + "category": [ + "actions", + "connmark" + ], + "setup": [ + [ + "$TC actions flush action connmark", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action connmark drop index 100", + "expExitCode": "0", + "verifyCmd": "$TC actions get action connmark index 100", + "matchPattern": "action order [0-9]+: connmark zone 0 drop.*index 100 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action connmark" + ] + }, + { + "id": "a913", + "name": "Add valid connmark action with control pipe", + "category": [ + "actions", + "connmark" + ], + "setup": [ + [ + "$TC actions flush action connmark", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action connmark pipe index 455", + "expExitCode": "0", + "verifyCmd": "$TC actions get action connmark index 455", + "matchPattern": "action order [0-9]+: connmark zone 0 pipe.*index 455 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action connmark" + ] + }, + { + "id": "bdd8", + "name": "Add valid connmark action with control reclassify", + "category": [ + "actions", + "connmark" + ], + "setup": [ + [ + "$TC actions flush action connmark", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action connmark reclassify index 7", + "expExitCode": "0", + "verifyCmd": "$TC actions list action connmark", + "matchPattern": "action order [0-9]+: connmark zone 0 reclassify.*index 7 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action connmark" + ] + }, + { + "id": "b8be", + "name": "Add valid connmark action with control continue", + "category": [ + "actions", + "connmark" + ], + "setup": [ + [ + "$TC actions flush action connmark", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action connmark continue index 17", + "expExitCode": "0", + "verifyCmd": "$TC actions list action connmark", + "matchPattern": "action order [0-9]+: connmark zone 0 continue.*index 17 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action connmark" + ] + }, + { + "id": "d8a6", + "name": "Add valid connmark action with control jump", + "category": [ + "actions", + "connmark" + ], + "setup": [ + [ + "$TC actions flush action connmark", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action connmark jump 10 index 17", + "expExitCode": "0", + "verifyCmd": "$TC actions list action connmark", + "matchPattern": "action order [0-9]+: connmark zone 0 jump 10.*index 17 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action connmark" + ] + }, + { + "id": "aae8", + "name": "Add valid connmark action with zone argument", + "category": [ + "actions", + "connmark" + ], + "setup": [ + [ + "$TC actions flush action connmark", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action connmark zone 100 pipe index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action connmark index 1", + "matchPattern": "action order [0-9]+: connmark zone 100 pipe.*index 1 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action connmark" + ] + }, + { + "id": "2f0b", + "name": "Add valid connmark action with invalid zone argument", + "category": [ + "actions", + "connmark" + ], + "setup": [ + [ + "$TC actions flush action connmark", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action connmark zone 65536 reclassify index 21", + "expExitCode": "255", + "verifyCmd": "$TC actions get action connmark index 1", + "matchPattern": "action order [0-9]+: connmark zone 65536 reclassify.*index 21 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action connmark" + ] + }, + { + "id": "9305", + "name": "Add connmark action with unsupported argument", + "category": [ + "actions", + "connmark" + ], + "setup": [ + [ + "$TC actions flush action connmark", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action connmark zone 655 unsupp_arg pass index 2", + "expExitCode": "255", + "verifyCmd": "$TC actions get action connmark index 2", + "matchPattern": "action order [0-9]+: connmark zone 655 unsupp_arg pass.*index 2 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action connmark" + ] + }, + { + "id": "71ca", + "name": "Add valid connmark action and replace it", + "category": [ + "actions", + "connmark" + ], + "setup": [ + [ + "$TC actions flush action connmark", + 0, + 1, + 255 + ], + "$TC actions add action connmark zone 777 pass index 555" + ], + "cmdUnderTest": "$TC actions replace action connmark zone 555 reclassify index 555", + "expExitCode": "0", + "verifyCmd": "$TC actions get action connmark index 555", + "matchPattern": "action order [0-9]+: connmark zone 555 reclassify.*index 555 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action connmark" + ] + }, + { + "id": "5f8f", + "name": "Add valid connmark action with cookie", + "category": [ + "actions", + "connmark" + ], + "setup": [ + [ + "$TC actions flush action connmark", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action connmark zone 555 pipe index 5 cookie aabbccddeeff112233445566778800a1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action connmark index 5", + "matchPattern": "action order [0-9]+: connmark zone 555 pipe.*index 5 ref.*cookie aabbccddeeff112233445566778800a1", + "matchCount": "1", + "teardown": [ + "$TC actions flush action connmark" + ] + } +] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json new file mode 100644 index 000000000000..93cf8fea8ae7 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json @@ -0,0 +1,410 @@ +[ + { + "id": "6d84", + "name": "Add csum iph action", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action csum iph index 800", + "expExitCode": "0", + "verifyCmd": "$TC actions get action csum index 800", + "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 800 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action csum" + ] + }, + { + "id": "1862", + "name": "Add csum ip4h action", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action csum ip4h index 7", + "expExitCode": "0", + "verifyCmd": "$TC actions get action csum index 7", + "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 7 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action csum" + ] + }, + { + "id": "15c6", + "name": "Add csum ipv4h action", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action csum ipv4h index 1122", + "expExitCode": "0", + "verifyCmd": "$TC actions get action csum index 1122", + "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 1122 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action csum" + ] + }, + { + "id": "bf47", + "name": "Add csum icmp action", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action csum icmp index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action csum index 1", + "matchPattern": "action order [0-9]*: csum \\(icmp\\) action pass.*index 1 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action csum" + ] + }, + { + "id": "cc1d", + "name": "Add csum igmp action", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action csum igmp index 999", + "expExitCode": "0", + "verifyCmd": "$TC actions get action csum index 999", + "matchPattern": "action order [0-9]*: csum \\(igmp\\) action pass.*index 999 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action csum" + ] + }, + { + "id": "bccc", + "name": "Add csum foobar action", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action csum foobar index 1", + "expExitCode": "255", + "verifyCmd": "$TC actions ls action csum", + "matchPattern": "action order [0-9]*: csum \\(foobar\\) action pass.*index 1 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action csum" + ] + }, + { + "id": "3bb4", + "name": "Add csum tcp action", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action csum tcp index 9999", + "expExitCode": "0", + "verifyCmd": "$TC actions get action csum index 9999", + "matchPattern": "action order [0-9]*: csum \\(tcp\\) action pass.*index 9999 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action csum" + ] + }, + { + "id": "759c", + "name": "Add csum udp action", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action csum udp index 334455", + "expExitCode": "0", + "verifyCmd": "$TC actions get action csum index 334455", + "matchPattern": "action order [0-9]*: csum \\(udp\\) action pass.*index 334455 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action csum" + ] + }, + { + "id": "bdb6", + "name": "Add csum udp xor iph action", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action csum udp xor iph index 3", + "expExitCode": "255", + "verifyCmd": "$TC actions ls action csum", + "matchPattern": "action order [0-9]*: csum \\(udp xor iph\\) action pass.*index 3 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action csum" + ] + }, + { + "id": "c220", + "name": "Add csum udplite action", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action csum udplite continue index 3", + "expExitCode": "0", + "verifyCmd": "$TC actions get action csum index 3", + "matchPattern": "action order [0-9]*: csum \\(udplite\\) action continue.*index 3 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action csum" + ] + }, + { + "id": "8993", + "name": "Add csum sctp action", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action csum sctp index 777", + "expExitCode": "0", + "verifyCmd": "$TC actions get action csum index 777", + "matchPattern": "action order [0-9]*: csum \\(sctp\\) action pass.*index 777 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action csum" + ] + }, + { + "id": "b138", + "name": "Add csum ip & icmp action", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action csum ip and icmp pipe index 123", + "expExitCode": "0", + "verifyCmd": "$TC actions get action csum index 123", + "matchPattern": "action order [0-9]*: csum \\(iph, icmp\\) action pipe.*index 123 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action csum" + ] + }, + { + "id": "eeda", + "name": "Add csum ip & sctp action", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action csum ipv4h sctp continue index 2", + "expExitCode": "0", + "verifyCmd": "$TC actions get action csum index 2", + "matchPattern": "action order [0-9]*: csum \\(iph, sctp\\) action continue.*index 2 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action csum" + ] + }, + { + "id": "0017", + "name": "Add csum udp or tcp action", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action csum udp or tcp continue index 27", + "expExitCode": "0", + "verifyCmd": "$TC actions get action csum index 27", + "matchPattern": "action order [0-9]*: csum \\(tcp, udp\\) action continue.*index 27 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action csum" + ] + }, + { + "id": "ce92", + "name": "Add csum udp action with cookie", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action csum udp pipe index 7 cookie 12345678", + "expExitCode": "0", + "verifyCmd": "$TC actions get action csum index 7", + "matchPattern": "action order [0-9]*: csum \\(udp\\) action pipe.*index 7.*cookie 12345678", + "matchCount": "1", + "teardown": [ + "$TC actions flush action csum" + ] + }, + { + "id": "912f", + "name": "Add csum icmp action with large cookie", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action csum icmp pipe index 17 cookie aabbccddeeff1122", + "expExitCode": "0", + "verifyCmd": "$TC actions get action csum index 17", + "matchPattern": "action order [0-9]*: csum \\(icmp\\) action pipe.*index 17.*cookie aabbccddeeff1122", + "matchCount": "1", + "teardown": [ + "$TC actions flush action csum" + ] + }, + { + "id": "879b", + "name": "Add batch of 32 csum tcp actions", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "for i in `seq 1 32`; do cmd=\"action csum tcp continue index $i \"; args=\"$args$cmd\"; done && $TC actions add $args", + "expExitCode": "255", + "verifyCmd": "$TC actions ls action csum", + "matchPattern": "^[ \t]+index [0-9]* ref", + "matchCount": "32", + "teardown": [ + "$TC actions flush action csum" + ] + } +] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json index e2187b6e0b7a..68c91023cdb9 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json @@ -465,5 +465,76 @@ "teardown": [ "$TC actions flush action gact" ] + }, + { + "id": "1021", + "name": "Add batch of 32 gact pass actions", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action pass index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "^[ \t]+index [0-9]+ ref", + "matchCount": "32", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "da7a", + "name": "Add batch of 32 gact continue actions with cookie", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action continue index \\$i cookie aabbccddeeff112233445566778800a1 \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "^[ \t]+index [0-9]+ ref", + "matchCount": "32", + "teardown": [ + "$TC actions flush action gact" + ] + }, + { + "id": "8aa3", + "name": "Delete batch of 32 gact continue actions", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action continue index \\$i \\\"; args=\\\"\\$args\\$cmd\\\"; done && $TC actions add \\$args\"" + ], + "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action gact index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "^[ \t]+index [0-9]+ ref", + "matchCount": "0", + "teardown": [] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json index 0fcccf18399b..443c9b3c8664 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json @@ -171,6 +171,198 @@ ] }, { + "id": "8917", + "name": "Add mirred mirror action with control pass", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pass index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action mirred index 1", + "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pass.*index 1 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action mirred" + ] + }, + { + "id": "1054", + "name": "Add mirred mirror action with control pipe", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pipe index 15", + "expExitCode": "0", + "verifyCmd": "$TC actions get action mirred index 15", + "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pipe.*index 15 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action mirred" + ] + }, + { + "id": "9887", + "name": "Add mirred mirror action with control continue", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo continue index 15", + "expExitCode": "0", + "verifyCmd": "$TC actions get action mirred index 15", + "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) continue.*index 15 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action mirred" + ] + }, + { + "id": "e4aa", + "name": "Add mirred mirror action with control reclassify", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo reclassify index 150", + "expExitCode": "0", + "verifyCmd": "$TC actions get action mirred index 150", + "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) reclassify.*index 150 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action mirred" + ] + }, + { + "id": "ece9", + "name": "Add mirred mirror action with control drop", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo drop index 99", + "expExitCode": "0", + "verifyCmd": "$TC actions get action mirred index 99", + "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) drop.*index 99 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action mirred" + ] + }, + { + "id": "0031", + "name": "Add mirred mirror action with control jump", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo jump 10 index 99", + "expExitCode": "0", + "verifyCmd": "$TC actions get action mirred index 99", + "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) jump 10.*index 99 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action mirred" + ] + }, + { + "id": "407c", + "name": "Add mirred mirror action with cookie", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo reclassify cookie aa11bb22cc33dd44ee55", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action mirred", + "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) reclassify.*cookie aa11bb22cc33dd44ee55", + "matchCount": "1", + "teardown": [ + "$TC actions flush action mirred" + ] + }, + { + "id": "8b69", + "name": "Add mirred mirror action with maximum index", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pipe index 4294967295", + "expExitCode": "0", + "verifyCmd": "$TC actions get action mirred index 4294967295", + "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pipe.*index 4294967295", + "matchCount": "1", + "teardown": [ + "$TC actions flush action mirred" + ] + }, + { "id": "a70e", "name": "Delete mirred mirror action", "category": [ diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json index 0e602a3f9393..38d85a1d7492 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json @@ -265,6 +265,150 @@ ] }, { + "id": "ddd6", + "name": "Add police action with invalid rate value", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 3tb burst 250k conform-exceed pass/pipe index 5", + "expExitCode": "255", + "verifyCmd": "$TC actions ls action police", + "matchPattern": "action order [0-9]*: police 0x5 rate 3Tb burst 250Kb mtu 2Kb action pass/pipe", + "matchCount": "0", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "f61c", + "name": "Add police action with invalid burst value", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 3kbit burst 250P conform-exceed pass/pipe index 5", + "expExitCode": "255", + "verifyCmd": "$TC actions ls action police", + "matchPattern": "action order [0-9]*: police 0x5 rate 3Kbit burst 250Pb mtu 2Kb action pass/pipe", + "matchCount": "0", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "c26f", + "name": "Add police action with invalid peakrate value", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 90kbit burst 10k mtu 2kb peakrate 100T index 1", + "expExitCode": "255", + "verifyCmd": "$TC actions ls action police", + "matchPattern": "action order [0-9]*: police 0x1 rate 90Kbit burst 10Kb mtu 2Kb peakrate 100Tbit", + "matchCount": "0", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "db04", + "name": "Add police action with invalid mtu value", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 10kbit burst 10k mtu 2Pbit index 1", + "expExitCode": "255", + "verifyCmd": "$TC actions ls action police", + "matchPattern": "action order [0-9]*: police 0x1 rate 10Kbit burst 1Kb mtu 2Pb", + "matchCount": "0", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "f3c9", + "name": "Add police action with cookie", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 10mbit burst 10k index 1 cookie a1b1c1d1e1f12233bb", + "expExitCode": "0", + "verifyCmd": "$TC actions get action police index 1", + "matchPattern": "action order [0-9]*: police 0x1 rate 10Mbit burst 10Kb mtu 2Kb.*cookie a1b1c1d1e1f12233bb", + "matchCount": "1", + "teardown": [ + "$TC actions flush action police" + ] + }, + { + "id": "d190", + "name": "Add police action with maximum index", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 10mbit burst 10k index 4294967295", + "expExitCode": "0", + "verifyCmd": "$TC actions get action mirred index 4294967295", + "matchPattern": "action order [0-9]*: police 0xffffffff rate 10Mbit burst 10Kb mtu 2Kb", + "matchCount": "1", + "teardown": [ + "$TC actions flush action mirred" + ] + }, + { "id": "336e", "name": "Delete police action", "category": [ diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json index 99635ea4722e..37ecc2716fee 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json @@ -216,6 +216,174 @@ ] }, { + "id": "464a", + "name": "Add skbedit action with control pipe", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit ptype host pipe index 11", + "expExitCode": "0", + "verifyCmd": "$TC actions get action skbedit index 11", + "matchPattern": "action order [0-9]*: skbedit ptype host pipe.*index 11 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "212f", + "name": "Add skbedit action with control reclassify", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit mark 56789 reclassify index 90", + "expExitCode": "0", + "verifyCmd": "$TC actions get action skbedit index 90", + "matchPattern": "action order [0-9]*: skbedit mark 56789 reclassify.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "0651", + "name": "Add skbedit action with control pass", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit queue_mapping 3 pass index 271", + "expExitCode": "0", + "verifyCmd": "$TC actions get action skbedit index 271", + "matchPattern": "action order [0-9]*: skbedit queue_mapping 3 pass.*index 271 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "cc53", + "name": "Add skbedit action with control drop", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit queue_mapping 3 drop index 271", + "expExitCode": "0", + "verifyCmd": "$TC actions get action skbedit index 271", + "matchPattern": "action order [0-9]*: skbedit queue_mapping 3 drop.*index 271 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "ec16", + "name": "Add skbedit action with control jump", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit priority 8 jump 9 index 2", + "expExitCode": "0", + "verifyCmd": "$TC actions get action skbedit index 2", + "matchPattern": "action order [0-9]*: skbedit priority :8 jump 9.*index 2 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "db54", + "name": "Add skbedit action with control continue", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit priority 16 continue index 32", + "expExitCode": "0", + "verifyCmd": "$TC actions get action skbedit index 32", + "matchPattern": "action order [0-9]*: skbedit priority :16 continue.*index 32 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "1055", + "name": "Add skbedit action with cookie", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit priority 16 continue index 32 cookie deadbeef", + "expExitCode": "0", + "verifyCmd": "$TC actions get action skbedit index 32", + "matchPattern": "action order [0-9]*: skbedit priority :16 continue.*index 32 ref.*cookie deadbeef", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { "id": "5172", "name": "List skbedit actions", "category": [ diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json index e34075059c26..fe3326e939c1 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json @@ -264,6 +264,30 @@ ] }, { + "id": "6046", + "name": "Add skbmod action with control reclassify and cookie", + "category": [ + "actions", + "skbmod" + ], + "setup": [ + [ + "$TC actions flush action skbmod", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbmod set smac 00:01:02:03:04:01 reclassify index 1 cookie ddeeffaabb11cc22", + "expExitCode": "0", + "verifyCmd": "$TC actions get action skbmod index 1", + "matchPattern": "action order [0-9]*: skbmod reclassify set smac 00:01:02:03:04:01.*index 1 ref.*cookie ddeeffaabb11cc22", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbmod" + ] + }, + { "id": "58cb", "name": "List skbmod actions", "category": [ @@ -315,7 +339,7 @@ "cmdUnderTest": "$TC actions ls action skbmod", "expExitCode": "0", "verifyCmd": "$TC actions get action skbmod index 4", - "matchPattern": "action order [0-9]*: skbmod pipe set etype 0x0031", + "matchPattern": "action order [0-9]*: skbmod pipe set etype 0x31", "matchCount": "1", "teardown": [ "$TC actions flush action skbmod" diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json new file mode 100644 index 000000000000..4510ddfa6e54 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json @@ -0,0 +1,410 @@ +[ + { + "id": "6f5a", + "name": "Add vlan pop action", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action vlan pop index 8", + "expExitCode": "0", + "verifyCmd": "$TC actions list action vlan", + "matchPattern": "action order [0-9]+: vlan.*pop.*index 8 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action vlan" + ] + }, + { + "id": "ee6f", + "name": "Add vlan pop action with large index", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action vlan pop index 4294967295", + "expExitCode": "0", + "verifyCmd": "$TC actions list action vlan", + "matchPattern": "action order [0-9]+: vlan.*pop.*index 4294967295 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action vlan" + ] + }, + { + "id": "b6b9", + "name": "Add vlan pop action with jump opcode", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action vlan pop jump 10 index 8", + "expExitCode": "0", + "verifyCmd": "$TC actions list action vlan", + "matchPattern": "action order [0-9]+: vlan.*jump 10.*index 8 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action vlan" + ] + }, + { + "id": "87c3", + "name": "Add vlan pop action with trap opcode", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action vlan pop trap index 8", + "expExitCode": "0", + "verifyCmd": "$TC actions list action vlan", + "matchPattern": "action order [0-9]+: vlan.*pop trap.*index 8 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action vlan" + ] + }, + { + "id": "2b91", + "name": "Add vlan invalid action", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action vlan bad_mode", + "expExitCode": "255", + "verifyCmd": "$TC actions list action vlan", + "matchPattern": "action order [0-9]+: vlan.*bad_mode", + "matchCount": "0", + "teardown": [ + "$TC actions flush action vlan" + ] + }, + { + "id": "57fc", + "name": "Add vlan action with invalid protocol type", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action vlan push protocol ABCD", + "expExitCode": "255", + "verifyCmd": "$TC actions list action vlan", + "matchPattern": "action order [0-9]+: vlan.*push", + "matchCount": "0", + "teardown": [ + "$TC actions flush action vlan" + ] + }, + { + "id": "3989", + "name": "Add vlan push action with default protocol and priority", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action vlan push id 123 index 18", + "expExitCode": "0", + "verifyCmd": "$TC actions get action vlan index 18", + "matchPattern": "action order [0-9]+: vlan.*push id 123 protocol 802.1Q priority 0 pipe.*index 18 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action vlan" + ] + }, + { + "id": "79dc", + "name": "Add vlan push action with protocol 802.1Q and priority 3", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action vlan push id 77 protocol 802.1Q priority 3 continue index 734", + "expExitCode": "0", + "verifyCmd": "$TC actions get action vlan index 734", + "matchPattern": "action order [0-9]+: vlan.*push id 77 protocol 802.1Q priority 3 continue.*index 734 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action vlan" + ] + }, + { + "id": "4d73", + "name": "Add vlan push action with protocol 802.1AD", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action vlan push id 1024 protocol 802.1AD pass index 10000", + "expExitCode": "0", + "verifyCmd": "$TC actions get action vlan index 10000", + "matchPattern": "action order [0-9]+: vlan.*push id 1024 protocol 802.1ad priority 0 pass.*index 10000 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action vlan" + ] + }, + { + "id": "1f7b", + "name": "Add vlan push action with invalid vlan ID", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action vlan push id 5678 index 1", + "expExitCode": "255", + "verifyCmd": "$TC actions list action vlan", + "matchPattern": "action order [0-9]+: vlan.*push id 5678.*index 1 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action vlan" + ] + }, + { + "id": "5d02", + "name": "Add vlan push action with invalid IEEE 802.1p priority", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action vlan push id 5 priority 10 index 1", + "expExitCode": "255", + "verifyCmd": "$TC actions list action vlan", + "matchPattern": "action order [0-9]+: vlan.*push id 5.*index 1 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action vlan" + ] + }, + { + "id": "6812", + "name": "Add vlan modify action for protocol 802.1Q", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1Q id 5 index 100", + "expExitCode": "0", + "verifyCmd": "$TC actions get action vlan index 100", + "matchPattern": "action order [0-9]+: vlan.*modify id 100 protocol 802.1Q priority 0 pipe.*index 100 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action vlan" + ] + }, + { + "id": "5a31", + "name": "Add vlan modify action for protocol 802.1AD", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1ad id 500 reclassify index 12", + "expExitCode": "0", + "verifyCmd": "$TC actions get action vlan index 12", + "matchPattern": "action order [0-9]+: vlan.*modify id 500 protocol 802.1ad priority 0 reclassify.*index 12 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action vlan" + ] + }, + { + "id": "83a4", + "name": "Delete vlan pop action", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ], + "$TC actions add action vlan pop index 44" + ], + "cmdUnderTest": "$TC actions del action vlan index 44", + "expExitCode": "0", + "verifyCmd": "$TC actions list action vlan", + "matchPattern": "action order [0-9]+: vlan.*pop.*index 44 ref", + "matchCount": "0", + "teardown": [] + }, + { + "id": "ed1e", + "name": "Delete vlan push action for protocol 802.1Q", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ], + "$TC actions add action vlan push id 4094 protocol 802.1Q index 999" + ], + "cmdUnderTest": "$TC actions del action vlan index 999", + "expExitCode": "0", + "verifyCmd": "$TC actions list action vlan", + "matchPattern": "action order [0-9]+: vlan.*push id 4094 protocol 802.1Q priority 0 pipe.*index 999 ref", + "matchCount": "0", + "teardown": [] + }, + { + "id": "a2a3", + "name": "Flush vlan actions", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ], + "$TC actions add action vlan push id 4 protocol 802.1ad index 10", + "$TC actions add action vlan push id 4 protocol 802.1ad index 11", + "$TC actions add action vlan push id 4 protocol 802.1ad index 12", + "$TC actions add action vlan push id 4 protocol 802.1ad index 13" + ], + "cmdUnderTest": "$TC actions flush action vlan", + "expExitCode": "0", + "verifyCmd": "$TC actions list action vlan", + "matchPattern": "action order [0-9]+: vlan.*push id 4 protocol 802.1ad", + "matchCount": "0", + "teardown": [] + }, + { + "id": "1d78", + "name": "Add vlan action with cookie", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action vlan push id 4 cookie a0a0a0a0a0a0a0", + "expExitCode": "0", + "verifyCmd": "$TC actions list action vlan", + "matchPattern": "action order [0-9]+: vlan.*push id 4.*cookie a0a0a0a0a0a0a0", + "matchCount": "1", + "teardown": [ + "$TC actions flush action vlan" + ] + } +] diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index fc373fdf2bdc..87a04a8a5945 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -11,16 +11,96 @@ import re import os import sys import argparse +import importlib import json import subprocess +import time +import traceback from collections import OrderedDict from string import Template from tdc_config import * from tdc_helper import * - -USE_NS = True +import TdcPlugin + + +class PluginMgrTestFail(Exception): + def __init__(self, stage, output, message): + self.stage = stage + self.output = output + self.message = message + +class PluginMgr: + def __init__(self, argparser): + super().__init__() + self.plugins = {} + self.plugin_instances = [] + self.args = [] + self.argparser = argparser + + # TODO, put plugins in order + plugindir = os.getenv('TDC_PLUGIN_DIR', './plugins') + for dirpath, dirnames, filenames in os.walk(plugindir): + for fn in filenames: + if (fn.endswith('.py') and + not fn == '__init__.py' and + not fn.startswith('#') and + not fn.startswith('.#')): + mn = fn[0:-3] + foo = importlib.import_module('plugins.' + mn) + self.plugins[mn] = foo + self.plugin_instances.append(foo.SubPlugin()) + + def call_pre_suite(self, testcount, testidlist): + for pgn_inst in self.plugin_instances: + pgn_inst.pre_suite(testcount, testidlist) + + def call_post_suite(self, index): + for pgn_inst in reversed(self.plugin_instances): + pgn_inst.post_suite(index) + + def call_pre_case(self, test_ordinal, testid): + for pgn_inst in self.plugin_instances: + try: + pgn_inst.pre_case(test_ordinal, testid) + except Exception as ee: + print('exception {} in call to pre_case for {} plugin'. + format(ee, pgn_inst.__class__)) + print('test_ordinal is {}'.format(test_ordinal)) + print('testid is {}'.format(testid)) + raise + + def call_post_case(self): + for pgn_inst in reversed(self.plugin_instances): + pgn_inst.post_case() + + def call_pre_execute(self): + for pgn_inst in self.plugin_instances: + pgn_inst.pre_execute() + + def call_post_execute(self): + for pgn_inst in reversed(self.plugin_instances): + pgn_inst.post_execute() + + def call_add_args(self, parser): + for pgn_inst in self.plugin_instances: + parser = pgn_inst.add_args(parser) + return parser + + def call_check_args(self, args, remaining): + for pgn_inst in self.plugin_instances: + pgn_inst.check_args(args, remaining) + + def call_adjust_command(self, stage, command): + for pgn_inst in self.plugin_instances: + command = pgn_inst.adjust_command(stage, command) + return command + + @staticmethod + def _make_argparser(args): + self.argparser = argparse.ArgumentParser( + description='Linux TC unit tests') def replace_keywords(cmd): @@ -33,21 +113,24 @@ def replace_keywords(cmd): return subcmd -def exec_cmd(command, nsonly=True): +def exec_cmd(args, pm, stage, command): """ Perform any required modifications on an executable command, then run it in a subprocess and return the results. """ - if (USE_NS and nsonly): - command = 'ip netns exec $NS ' + command - + if len(command.strip()) == 0: + return None, None if '$' in command: command = replace_keywords(command) + command = pm.call_adjust_command(stage, command) + if args.verbose > 0: + print('command "{}"'.format(command)) proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + stderr=subprocess.PIPE, + env=ENVIR) (rawout, serr) = proc.communicate() if proc.returncode != 0 and len(serr) > 0: @@ -60,36 +143,99 @@ def exec_cmd(command, nsonly=True): return proc, foutput -def prepare_env(cmdlist): +def prepare_env(args, pm, stage, prefix, cmdlist, output = None): """ - Execute the setup/teardown commands for a test case. Optionally - terminate test execution if the command fails. + Execute the setup/teardown commands for a test case. + Optionally terminate test execution if the command fails. """ + if args.verbose > 0: + print('{}'.format(prefix)) for cmdinfo in cmdlist: - if (type(cmdinfo) == list): + if isinstance(cmdinfo, list): exit_codes = cmdinfo[1:] cmd = cmdinfo[0] else: exit_codes = [0] cmd = cmdinfo - if (len(cmd) == 0): + if not cmd: continue - (proc, foutput) = exec_cmd(cmd) + (proc, foutput) = exec_cmd(args, pm, stage, cmd) + + if proc and (proc.returncode not in exit_codes): + print('', file=sys.stderr) + print("{} *** Could not execute: \"{}\"".format(prefix, cmd), + file=sys.stderr) + print("\n{} *** Error message: \"{}\"".format(prefix, foutput), + file=sys.stderr) + print("\n{} *** Aborting test run.".format(prefix), file=sys.stderr) + print("\n\n{} *** stdout ***".format(proc.stdout), file=sys.stderr) + print("\n\n{} *** stderr ***".format(proc.stderr), file=sys.stderr) + raise PluginMgrTestFail( + stage, output, + '"{}" did not complete successfully'.format(prefix)) + +def run_one_test(pm, args, index, tidx): + global NAMES + result = True + tresult = "" + tap = "" + if args.verbose > 0: + print("\t====================\n=====> ", end="") + print("Test " + tidx["id"] + ": " + tidx["name"]) + + # populate NAMES with TESTID for this test + NAMES['TESTID'] = tidx['id'] + + pm.call_pre_case(index, tidx['id']) + prepare_env(args, pm, 'setup', "-----> prepare stage", tidx["setup"]) + + if (args.verbose > 0): + print('-----> execute stage') + pm.call_pre_execute() + (p, procout) = exec_cmd(args, pm, 'execute', tidx["cmdUnderTest"]) + exit_code = p.returncode + pm.call_post_execute() + + if (exit_code != int(tidx["expExitCode"])): + result = False + print("exit:", exit_code, int(tidx["expExitCode"])) + print(procout) + else: + if args.verbose > 0: + print('-----> verify stage') + match_pattern = re.compile( + str(tidx["matchPattern"]), re.DOTALL | re.MULTILINE) + (p, procout) = exec_cmd(args, pm, 'verify', tidx["verifyCmd"]) + if procout: + match_index = re.findall(match_pattern, procout) + if len(match_index) != int(tidx["matchCount"]): + result = False + elif int(tidx["matchCount"]) != 0: + result = False + + if not result: + tresult += 'not ' + tresult += 'ok {} - {} # {}\n'.format(str(index), tidx['id'], tidx['name']) + tap += tresult - if proc.returncode not in exit_codes: - print - print("Could not execute:") - print(cmd) - print("\nError message:") - print(foutput) - print("\nAborting test run.") - ns_destroy() - exit(1) + if result == False: + if procout: + tap += procout + else: + tap += 'No output!\n' + + prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'], procout) + pm.call_post_case() + index += 1 + + # remove TESTID from NAMES + del(NAMES['TESTID']) + return tap -def test_runner(filtered_tests, args): +def test_runner(pm, args, filtered_tests): """ Driver function for the unit tests. @@ -101,75 +247,92 @@ def test_runner(filtered_tests, args): testlist = filtered_tests tcount = len(testlist) index = 1 - tap = str(index) + ".." + str(tcount) + "\n" - + tap = '' + badtest = None + stage = None + emergency_exit = False + emergency_exit_message = '' + + if args.notap: + if args.verbose: + tap = 'notap requested: omitting test plan\n' + else: + tap = str(index) + ".." + str(tcount) + "\n" + try: + pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist]) + except Exception as ee: + ex_type, ex, ex_tb = sys.exc_info() + print('Exception {} {} (caught in pre_suite).'. + format(ex_type, ex)) + # when the extra print statements are uncommented, + # the traceback does not appear between them + # (it appears way earlier in the tdc.py output) + # so don't bother ... + # print('--------------------(') + # print('traceback') + traceback.print_tb(ex_tb) + # print('--------------------)') + emergency_exit_message = 'EMERGENCY EXIT, call_pre_suite failed with exception {} {}\n'.format(ex_type, ex) + emergency_exit = True + stage = 'pre-SUITE' + + if emergency_exit: + pm.call_post_suite(index) + return emergency_exit_message + if args.verbose > 1: + print('give test rig 2 seconds to stabilize') + time.sleep(2) for tidx in testlist: - result = True - tresult = "" if "flower" in tidx["category"] and args.device == None: + if args.verbose > 1: + print('Not executing test {} {} because DEV2 not defined'. + format(tidx['id'], tidx['name'])) continue - print("Test " + tidx["id"] + ": " + tidx["name"]) - prepare_env(tidx["setup"]) - (p, procout) = exec_cmd(tidx["cmdUnderTest"]) - exit_code = p.returncode - - if (exit_code != int(tidx["expExitCode"])): - result = False - print("exit:", exit_code, int(tidx["expExitCode"])) - print(procout) - else: - match_pattern = re.compile(str(tidx["matchPattern"]), re.DOTALL) - (p, procout) = exec_cmd(tidx["verifyCmd"]) - match_index = re.findall(match_pattern, procout) - if len(match_index) != int(tidx["matchCount"]): - result = False - - if result == True: - tresult += "ok " - else: - tresult += "not ok " - tap += tresult + str(index) + " " + tidx["id"] + " " + tidx["name"] + "\n" - - if result == False: - tap += procout - - prepare_env(tidx["teardown"]) + try: + badtest = tidx # in case it goes bad + tap += run_one_test(pm, args, index, tidx) + except PluginMgrTestFail as pmtf: + ex_type, ex, ex_tb = sys.exc_info() + stage = pmtf.stage + message = pmtf.message + output = pmtf.output + print(message) + print('Exception {} {} (caught in test_runner, running test {} {} {} stage {})'. + format(ex_type, ex, index, tidx['id'], tidx['name'], stage)) + print('---------------') + print('traceback') + traceback.print_tb(ex_tb) + print('---------------') + if stage == 'teardown': + print('accumulated output for this test:') + if pmtf.output: + print(pmtf.output) + print('---------------') + break index += 1 - return tap - + # if we failed in setup or teardown, + # fill in the remaining tests with ok-skipped + count = index + if not args.notap: + tap += 'about to flush the tap output if tests need to be skipped\n' + if tcount + 1 != index: + for tidx in testlist[index - 1:]: + msg = 'skipped - previous {} failed'.format(stage) + tap += 'ok {} - {} # {} {} {}\n'.format( + count, tidx['id'], msg, index, badtest.get('id', '--Unknown--')) + count += 1 -def ns_create(): - """ - Create the network namespace in which the tests will be run and set up - the required network devices for it. - """ - if (USE_NS): - cmd = 'ip netns add $NS' - exec_cmd(cmd, False) - cmd = 'ip link add $DEV0 type veth peer name $DEV1' - exec_cmd(cmd, False) - cmd = 'ip link set $DEV1 netns $NS' - exec_cmd(cmd, False) - cmd = 'ip link set $DEV0 up' - exec_cmd(cmd, False) - cmd = 'ip -n $NS link set $DEV1 up' - exec_cmd(cmd, False) - cmd = 'ip link set $DEV2 netns $NS' - exec_cmd(cmd, False) - cmd = 'ip -n $NS link set $DEV2 up' - exec_cmd(cmd, False) + tap += 'done flushing skipped test tap output\n' + if args.pause: + print('Want to pause\nPress enter to continue ...') + if input(sys.stdin): + print('got something on stdin') -def ns_destroy(): - """ - Destroy the network namespace for testing (and any associated network - devices as well) - """ - if (USE_NS): - cmd = 'ip netns delete $NS' - exec_cmd(cmd, False) + pm.call_post_suite(index) + return tap def has_blank_ids(idlist): """ @@ -209,41 +372,70 @@ def set_args(parser): """ Set the command line arguments for tdc. """ - parser.add_argument('-p', '--path', type=str, - help='The full path to the tc executable to use') - parser.add_argument('-c', '--category', type=str, nargs='?', const='+c', - help='Run tests only from the specified category, or if no category is specified, list known categories.') - parser.add_argument('-f', '--file', type=str, - help='Run tests from the specified file') - parser.add_argument('-l', '--list', type=str, nargs='?', const="++", metavar='CATEGORY', - help='List all test cases, or those only within the specified category') - parser.add_argument('-s', '--show', type=str, nargs=1, metavar='ID', dest='showID', - help='Display the test case with specified id') - parser.add_argument('-e', '--execute', type=str, nargs=1, metavar='ID', - help='Execute the single test case with specified ID') - parser.add_argument('-i', '--id', action='store_true', dest='gen_id', - help='Generate ID numbers for new test cases') + parser.add_argument( + '-p', '--path', type=str, + help='The full path to the tc executable to use') + sg = parser.add_argument_group( + 'selection', 'select which test cases: ' + + 'files plus directories; filtered by categories plus testids') + ag = parser.add_argument_group( + 'action', 'select action to perform on selected test cases') + + sg.add_argument( + '-D', '--directory', nargs='+', metavar='DIR', + help='Collect tests from the specified directory(ies) ' + + '(default [tc-tests])') + sg.add_argument( + '-f', '--file', nargs='+', metavar='FILE', + help='Run tests from the specified file(s)') + sg.add_argument( + '-c', '--category', nargs='*', metavar='CATG', default=['+c'], + help='Run tests only from the specified category/ies, ' + + 'or if no category/ies is/are specified, list known categories.') + sg.add_argument( + '-e', '--execute', nargs='+', metavar='ID', + help='Execute the specified test cases with specified IDs') + ag.add_argument( + '-l', '--list', action='store_true', + help='List all test cases, or those only within the specified category') + ag.add_argument( + '-s', '--show', action='store_true', dest='showID', + help='Display the selected test cases') + ag.add_argument( + '-i', '--id', action='store_true', dest='gen_id', + help='Generate ID numbers for new test cases') + parser.add_argument( + '-v', '--verbose', action='count', default=0, + help='Show the commands that are being run') + parser.add_argument( + '-N', '--notap', action='store_true', + help='Suppress tap results for command under test') parser.add_argument('-d', '--device', help='Execute the test case in flower category') + parser.add_argument( + '-P', '--pause', action='store_true', + help='Pause execution just before post-suite stage') return parser -def check_default_settings(args): +def check_default_settings(args, remaining, pm): """ - Process any arguments overriding the default settings, and ensure the - settings are correct. + Process any arguments overriding the default settings, + and ensure the settings are correct. """ # Allow for overriding specific settings global NAMES if args.path != None: - NAMES['TC'] = args.path + NAMES['TC'] = args.path if args.device != None: - NAMES['DEV2'] = args.device + NAMES['DEV2'] = args.device if not os.path.isfile(NAMES['TC']): print("The specified tc path " + NAMES['TC'] + " does not exist.") exit(1) + pm.call_check_args(args, remaining) + def get_id_list(alltests): """ @@ -277,7 +469,7 @@ def generate_case_ids(alltests): for c in alltests: if (c["id"] == ""): while True: - newid = str('%04x' % random.randrange(16**4)) + newid = str('{:04x}'.format(random.randrange(16**4))) if (does_id_exist(alltests, newid)): continue else: @@ -298,42 +490,110 @@ def generate_case_ids(alltests): testlist.append(t) outfile = open(f, "w") json.dump(testlist, outfile, indent=4) + outfile.write("\n") outfile.close() +def filter_tests_by_id(args, testlist): + ''' + Remove tests from testlist that are not in the named id list. + If id list is empty, return empty list. + ''' + newlist = list() + if testlist and args.execute: + target_ids = args.execute + + if isinstance(target_ids, list) and (len(target_ids) > 0): + newlist = list(filter(lambda x: x['id'] in target_ids, testlist)) + return newlist + +def filter_tests_by_category(args, testlist): + ''' + Remove tests from testlist that are not in a named category. + ''' + answer = list() + if args.category and testlist: + test_ids = list() + for catg in set(args.category): + if catg == '+c': + continue + print('considering category {}'.format(catg)) + for tc in testlist: + if catg in tc['category'] and tc['id'] not in test_ids: + answer.append(tc) + test_ids.append(tc['id']) + + return answer def get_test_cases(args): """ If a test case file is specified, retrieve tests from that file. Otherwise, glob for all json files in subdirectories and load from each one. + Also, if requested, filter by category, and add tests matching + certain ids. """ import fnmatch - if args.file != None: - if not os.path.isfile(args.file): - print("The specified test case file " + args.file + " does not exist.") - exit(1) - flist = [args.file] - else: - flist = [] - for root, dirnames, filenames in os.walk('tc-tests'): + + flist = [] + testdirs = ['tc-tests'] + + if args.file: + # at least one file was specified - remove the default directory + testdirs = [] + + for ff in args.file: + if not os.path.isfile(ff): + print("IGNORING file " + ff + "\n\tBECAUSE does not exist.") + else: + flist.append(os.path.abspath(ff)) + + if args.directory: + testdirs = args.directory + + for testdir in testdirs: + for root, dirnames, filenames in os.walk(testdir): for filename in fnmatch.filter(filenames, '*.json'): - flist.append(os.path.join(root, filename)) - alltests = list() + candidate = os.path.abspath(os.path.join(root, filename)) + if candidate not in testdirs: + flist.append(candidate) + + alltestcases = list() for casefile in flist: - alltests = alltests + (load_from_file(casefile)) - return alltests + alltestcases = alltestcases + (load_from_file(casefile)) + + allcatlist = get_test_categories(alltestcases) + allidlist = get_id_list(alltestcases) + testcases_by_cats = get_categorized_testlist(alltestcases, allcatlist) + idtestcases = filter_tests_by_id(args, alltestcases) + cattestcases = filter_tests_by_category(args, alltestcases) -def set_operation_mode(args): + cat_ids = [x['id'] for x in cattestcases] + if args.execute: + if args.category: + alltestcases = cattestcases + [x for x in idtestcases if x['id'] not in cat_ids] + else: + alltestcases = idtestcases + else: + if cat_ids: + alltestcases = cattestcases + else: + # just accept the existing value of alltestcases, + # which has been filtered by file/directory + pass + + return allcatlist, allidlist, testcases_by_cats, alltestcases + + +def set_operation_mode(pm, args): """ Load the test case data and process remaining arguments to determine what the script should do for this run, and call the appropriate function. """ - alltests = get_test_cases(args) + ucat, idlist, testcases, alltests = get_test_cases(args) if args.gen_id: - idlist = get_id_list(alltests) if (has_blank_ids(idlist)): alltests = generate_case_ids(alltests) else: @@ -347,70 +607,29 @@ def set_operation_mode(args): print("Please correct them before continuing.") exit(1) - ucat = get_test_categories(alltests) - if args.showID: - show_test_case_by_id(alltests, args.showID[0]) + for atest in alltests: + print_test_case(atest) exit(0) - if args.execute: - target_id = args.execute[0] - else: - target_id = "" - - if args.category: - if (args.category == '+c'): - print("Available categories:") - print_sll(ucat) - exit(0) - else: - target_category = args.category - else: - target_category = "" - - - testcases = get_categorized_testlist(alltests, ucat) + if isinstance(args.category, list) and (len(args.category) == 0): + print("Available categories:") + print_sll(ucat) + exit(0) if args.list: - if (args.list == "++"): + if args.list: list_test_cases(alltests) exit(0) - elif(len(args.list) > 0): - if (args.list not in ucat): - print("Unknown category " + args.list) - print("Available categories:") - print_sll(ucat) - exit(1) - list_test_cases(testcases[args.list]) - exit(0) - - if (os.geteuid() != 0): - print("This script must be run with root privileges.\n") - exit(1) - - ns_create() - - if (len(target_category) == 0): - if (len(target_id) > 0): - alltests = list(filter(lambda x: target_id in x['id'], alltests)) - if (len(alltests) == 0): - print("Cannot find a test case with ID matching " + target_id) - exit(1) - catresults = test_runner(alltests, args) - print("All test results: " + "\n\n" + catresults) - elif (len(target_category) > 0): - if (target_category == "flower") and args.device == None: - print("Please specify a NIC device (-d) to run category flower") - exit(1) - if (target_category not in ucat): - print("Specified category is not present in this file.") - exit(1) - else: - catresults = test_runner(testcases[target_category], args) - print("Category " + target_category + "\n\n" + catresults) - - ns_destroy() + if len(alltests): + catresults = test_runner(pm, args, alltests) + else: + catresults = 'No tests found\n' + if args.notap: + print('Tap output suppression requested\n') + else: + print('All test results: \n\n{}'.format(catresults)) def main(): """ @@ -419,10 +638,15 @@ def main(): """ parser = args_parse() parser = set_args(parser) + pm = PluginMgr(parser) + parser = pm.call_add_args(parser) (args, remaining) = parser.parse_known_args() - check_default_settings(args) + args.NAMES = NAMES + check_default_settings(args, remaining, pm) + if args.verbose > 2: + print('args is {}'.format(args)) - set_operation_mode(args) + set_operation_mode(pm, args) exit(0) diff --git a/tools/testing/selftests/tc-testing/tdc_batch.py b/tools/testing/selftests/tc-testing/tdc_batch.py index 707c6bfef689..52fa539dc662 100755 --- a/tools/testing/selftests/tc-testing/tdc_batch.py +++ b/tools/testing/selftests/tc-testing/tdc_batch.py @@ -49,13 +49,13 @@ index = 0 for i in range(0x100): for j in range(0x100): for k in range(0x100): - mac = ("%02x:%02x:%02x" % (i, j, k)) + mac = ("{:02x}:{:02x}:{:02x}".format(i, j, k)) src_mac = "e4:11:00:" + mac dst_mac = "e4:12:00:" + mac - cmd = ("filter add dev %s %s protocol ip parent ffff: flower %s " - "src_mac %s dst_mac %s action drop %s" % + cmd = ("filter add dev {} {} protocol ip parent ffff: flower {} " + "src_mac {} dst_mac {} action drop {}".format (device, prio, skip, src_mac, dst_mac, share_action)) - file.write("%s\n" % cmd) + file.write("{}\n".format(cmd)) index += 1 if index >= number: file.close() diff --git a/tools/testing/selftests/tc-testing/tdc_helper.py b/tools/testing/selftests/tc-testing/tdc_helper.py index db381120a566..9f35c96c88a0 100644 --- a/tools/testing/selftests/tc-testing/tdc_helper.py +++ b/tools/testing/selftests/tc-testing/tdc_helper.py @@ -57,20 +57,11 @@ def print_sll(items): def print_test_case(tcase): """ Pretty-printing of a given test case. """ + print('\n==============\nTest {}\t{}\n'.format(tcase['id'], tcase['name'])) for k in tcase.keys(): if (isinstance(tcase[k], list)): print(k + ":") print_list(tcase[k]) else: - print(k + ": " + tcase[k]) - - -def show_test_case_by_id(testlist, caseID): - """ Find the specified test case to pretty-print. """ - if not any(d.get('id', None) == caseID for d in testlist): - print("That ID does not exist.") - exit(1) - else: - print_test_case(next((d for d in testlist if d['id'] == caseID))) - - + if not ((k == 'id') or (k == 'name')): + print(k + ": " + str(tcase[k])) diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index 3d5a62ff7d31..f5d7a7851e21 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -1,4 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +include ../lib.mk + ifndef CROSS_COMPILE CFLAGS := -std=gnu99 CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector @@ -6,16 +8,14 @@ ifeq ($(CONFIG_X86_32),y) LDLIBS += -lgcc_s endif -TEST_PROGS := vdso_test vdso_standalone_test_x86 +TEST_PROGS := $(OUTPUT)/vdso_test $(OUTPUT)/vdso_standalone_test_x86 all: $(TEST_PROGS) -vdso_test: parse_vdso.c vdso_test.c -vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c +$(OUTPUT)/vdso_test: parse_vdso.c vdso_test.c +$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c $(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \ vdso_standalone_test_x86.c parse_vdso.c \ - -o vdso_standalone_test_x86 + -o $@ -include ../lib.mk -clean: - rm -fr $(TEST_PROGS) +EXTRA_CLEAN := $(TEST_PROGS) endif diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 1ca2ee4d15b9..342c7bc9dc8c 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -10,3 +10,5 @@ userfaultfd mlock-intersect-test mlock-random-test virtual_address_range +gup_benchmark +va_128TBswitch diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 7f45806bd863..fdefa2295ddc 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -8,17 +8,18 @@ endif CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS) LDLIBS = -lrt TEST_GEN_FILES = compaction_test +TEST_GEN_FILES += gup_benchmark TEST_GEN_FILES += hugepage-mmap TEST_GEN_FILES += hugepage-shm TEST_GEN_FILES += map_hugetlb +TEST_GEN_FILES += mlock-random-test TEST_GEN_FILES += mlock2-tests TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd -TEST_GEN_FILES += mlock-random-test +TEST_GEN_FILES += va_128TBswitch TEST_GEN_FILES += virtual_address_range -TEST_GEN_FILES += gup_benchmark TEST_PROGS := run_vmtests diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c index a65b016d4c13..1097f04e4d80 100644 --- a/tools/testing/selftests/vm/compaction_test.c +++ b/tools/testing/selftests/vm/compaction_test.c @@ -137,6 +137,8 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size) printf("No of huge pages allocated = %d\n", (atoi(nr_hugepages))); + lseek(fd, 0, SEEK_SET); + if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages)) != strlen(initial_nr_hugepages)) { perror("Failed to write value to /proc/sys/vm/nr_hugepages\n"); diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index cc826326de87..22d564673830 100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -2,25 +2,33 @@ # SPDX-License-Identifier: GPL-2.0 #please run as root -#we need 256M, below is the size in kB -needmem=262144 mnt=./huge exitcode=0 -#get pagesize and freepages from /proc/meminfo +#get huge pagesize and freepages from /proc/meminfo while read name size unit; do if [ "$name" = "HugePages_Free:" ]; then freepgs=$size fi if [ "$name" = "Hugepagesize:" ]; then - pgsize=$size + hpgsize_KB=$size fi done < /proc/meminfo +# Simple hugetlbfs tests have a hardcoded minimum requirement of +# huge pages totaling 256MB (262144KB) in size. The userfaultfd +# hugetlb test requires a minimum of 2 * nr_cpus huge pages. Take +# both of these requirements into account and attempt to increase +# number of huge pages available. +nr_cpus=$(nproc) +hpgsize_MB=$((hpgsize_KB / 1024)) +half_ufd_size_MB=$((((nr_cpus * hpgsize_MB + 127) / 128) * 128)) +needmem_KB=$((half_ufd_size_MB * 2 * 1024)) + #set proper nr_hugepages -if [ -n "$freepgs" ] && [ -n "$pgsize" ]; then +if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then nr_hugepgs=`cat /proc/sys/vm/nr_hugepages` - needpgs=`expr $needmem / $pgsize` + needpgs=$((needmem_KB / hpgsize_KB)) tries=2 while [ $tries -gt 0 ] && [ $freepgs -lt $needpgs ]; do lackpgs=$(( $needpgs - $freepgs )) @@ -107,8 +115,9 @@ fi echo "---------------------------" echo "running userfaultfd_hugetlb" echo "---------------------------" -# 256MB total huge pages == 128MB src and 128MB dst -./userfaultfd hugetlb 128 32 $mnt/ufd_test_file +# Test requires source and destination huge pages. Size of source +# (half_ufd_size_MB) is passed as argument to test. +./userfaultfd hugetlb $half_ufd_size_MB 32 $mnt/ufd_test_file if [ $? -ne 0 ]; then echo "[FAIL]" exitcode=1 @@ -177,4 +186,15 @@ else echo "[PASS]" fi +echo "-----------------------------" +echo "running virtual address 128TB switch test" +echo "-----------------------------" +./va_128TBswitch +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + exit $exitcode diff --git a/tools/testing/selftests/vm/va_128TBswitch.c b/tools/testing/selftests/vm/va_128TBswitch.c new file mode 100644 index 000000000000..e7fe734c374f --- /dev/null +++ b/tools/testing/selftests/vm/va_128TBswitch.c @@ -0,0 +1,297 @@ +/* + * + * Authors: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> + * Authors: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +#include <stdio.h> +#include <sys/mman.h> +#include <string.h> + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +#ifdef __powerpc64__ +#define PAGE_SIZE (64 << 10) +/* + * This will work with 16M and 2M hugepage size + */ +#define HUGETLB_SIZE (16 << 20) +#else +#define PAGE_SIZE (4 << 10) +#define HUGETLB_SIZE (2 << 20) +#endif + +/* + * >= 128TB is the hint addr value we used to select + * large address space. + */ +#define ADDR_SWITCH_HINT (1UL << 47) +#define LOW_ADDR ((void *) (1UL << 30)) +#define HIGH_ADDR ((void *) (1UL << 48)) + +struct testcase { + void *addr; + unsigned long size; + unsigned long flags; + const char *msg; + unsigned int low_addr_required:1; + unsigned int keep_mapped:1; +}; + +static struct testcase testcases[] = { + { + /* + * If stack is moved, we could possibly allocate + * this at the requested address. + */ + .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)), + .size = PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, PAGE_SIZE)", + .low_addr_required = 1, + }, + { + /* + * We should never allocate at the requested address or above it + * The len cross the 128TB boundary. Without MAP_FIXED + * we will always search in the lower address space. + */ + .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)), + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, (2 * PAGE_SIZE))", + .low_addr_required = 1, + }, + { + /* + * Exact mapping at 128TB, the area is free we should get that + * even without MAP_FIXED. + */ + .addr = ((void *)(ADDR_SWITCH_HINT)), + .size = PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT, PAGE_SIZE)", + .keep_mapped = 1, + }, + { + .addr = (void *)(ADDR_SWITCH_HINT), + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED)", + }, + { + .addr = NULL, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(NULL)", + .low_addr_required = 1, + }, + { + .addr = LOW_ADDR, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(LOW_ADDR)", + .low_addr_required = 1, + }, + { + .addr = HIGH_ADDR, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(HIGH_ADDR)", + .keep_mapped = 1, + }, + { + .addr = HIGH_ADDR, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(HIGH_ADDR) again", + .keep_mapped = 1, + }, + { + .addr = HIGH_ADDR, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(HIGH_ADDR, MAP_FIXED)", + }, + { + .addr = (void *) -1, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1)", + .keep_mapped = 1, + }, + { + .addr = (void *) -1, + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1) again", + }, + { + .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)), + .size = PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, PAGE_SIZE)", + .low_addr_required = 1, + }, + { + .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE), + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2 * PAGE_SIZE)", + .low_addr_required = 1, + .keep_mapped = 1, + }, + { + .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE / 2), + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE/2 , 2 * PAGE_SIZE)", + .low_addr_required = 1, + .keep_mapped = 1, + }, + { + .addr = ((void *)(ADDR_SWITCH_HINT)), + .size = PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT, PAGE_SIZE)", + }, + { + .addr = (void *)(ADDR_SWITCH_HINT), + .size = 2 * PAGE_SIZE, + .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED)", + }, +}; + +static struct testcase hugetlb_testcases[] = { + { + .addr = NULL, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(NULL, MAP_HUGETLB)", + .low_addr_required = 1, + }, + { + .addr = LOW_ADDR, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(LOW_ADDR, MAP_HUGETLB)", + .low_addr_required = 1, + }, + { + .addr = HIGH_ADDR, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(HIGH_ADDR, MAP_HUGETLB)", + .keep_mapped = 1, + }, + { + .addr = HIGH_ADDR, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(HIGH_ADDR, MAP_HUGETLB) again", + .keep_mapped = 1, + }, + { + .addr = HIGH_ADDR, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(HIGH_ADDR, MAP_FIXED | MAP_HUGETLB)", + }, + { + .addr = (void *) -1, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1, MAP_HUGETLB)", + .keep_mapped = 1, + }, + { + .addr = (void *) -1, + .size = HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1, MAP_HUGETLB) again", + }, + { + .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE), + .size = 2 * HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2*HUGETLB_SIZE, MAP_HUGETLB)", + .low_addr_required = 1, + .keep_mapped = 1, + }, + { + .addr = (void *)(ADDR_SWITCH_HINT), + .size = 2 * HUGETLB_SIZE, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(ADDR_SWITCH_HINT , 2*HUGETLB_SIZE, MAP_FIXED | MAP_HUGETLB)", + }, +}; + +static int run_test(struct testcase *test, int count) +{ + void *p; + int i, ret = 0; + + for (i = 0; i < count; i++) { + struct testcase *t = test + i; + + p = mmap(t->addr, t->size, PROT_READ | PROT_WRITE, t->flags, -1, 0); + + printf("%s: %p - ", t->msg, p); + + if (p == MAP_FAILED) { + printf("FAILED\n"); + ret = 1; + continue; + } + + if (t->low_addr_required && p >= (void *)(ADDR_SWITCH_HINT)) { + printf("FAILED\n"); + ret = 1; + } else { + /* + * Do a dereference of the address returned so that we catch + * bugs in page fault handling + */ + memset(p, 0, t->size); + printf("OK\n"); + } + if (!t->keep_mapped) + munmap(p, t->size); + } + + return ret; +} + +static int supported_arch(void) +{ +#if defined(__powerpc64__) + return 1; +#elif defined(__x86_64__) + return 1; +#else + return 0; +#endif +} + +int main(int argc, char **argv) +{ + int ret; + + if (!supported_arch()) + return 0; + + ret = run_test(testcases, ARRAY_SIZE(testcases)); + if (argc == 2 && !strcmp(argv[1], "--run-hugetlb")) + ret = run_test(hugetlb_testcases, ARRAY_SIZE(hugetlb_testcases)); + return ret; +} diff --git a/tools/testing/selftests/x86/5lvl.c b/tools/testing/selftests/x86/5lvl.c deleted file mode 100644 index 2eafdcd4c2b3..000000000000 --- a/tools/testing/selftests/x86/5lvl.c +++ /dev/null @@ -1,177 +0,0 @@ -#include <stdio.h> -#include <sys/mman.h> - -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) - -#define PAGE_SIZE 4096 -#define LOW_ADDR ((void *) (1UL << 30)) -#define HIGH_ADDR ((void *) (1UL << 50)) - -struct testcase { - void *addr; - unsigned long size; - unsigned long flags; - const char *msg; - unsigned int low_addr_required:1; - unsigned int keep_mapped:1; -}; - -static struct testcase testcases[] = { - { - .addr = NULL, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(NULL)", - .low_addr_required = 1, - }, - { - .addr = LOW_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(LOW_ADDR)", - .low_addr_required = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR)", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR) again", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap(HIGH_ADDR, MAP_FIXED)", - }, - { - .addr = (void*) -1, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1)", - .keep_mapped = 1, - }, - { - .addr = (void*) -1, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1) again", - }, - { - .addr = (void *)((1UL << 47) - PAGE_SIZE), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap((1UL << 47), 2 * PAGE_SIZE)", - .low_addr_required = 1, - .keep_mapped = 1, - }, - { - .addr = (void *)((1UL << 47) - PAGE_SIZE / 2), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap((1UL << 47), 2 * PAGE_SIZE / 2)", - .low_addr_required = 1, - .keep_mapped = 1, - }, - { - .addr = (void *)((1UL << 47) - PAGE_SIZE), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap((1UL << 47) - PAGE_SIZE, 2 * PAGE_SIZE, MAP_FIXED)", - }, - { - .addr = NULL, - .size = 2UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(NULL, MAP_HUGETLB)", - .low_addr_required = 1, - }, - { - .addr = LOW_ADDR, - .size = 2UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(LOW_ADDR, MAP_HUGETLB)", - .low_addr_required = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR, MAP_HUGETLB)", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR, MAP_HUGETLB) again", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap(HIGH_ADDR, MAP_FIXED | MAP_HUGETLB)", - }, - { - .addr = (void*) -1, - .size = 2UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1, MAP_HUGETLB)", - .keep_mapped = 1, - }, - { - .addr = (void*) -1, - .size = 2UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1, MAP_HUGETLB) again", - }, - { - .addr = (void *)((1UL << 47) - PAGE_SIZE), - .size = 4UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap((1UL << 47), 4UL << 20, MAP_HUGETLB)", - .low_addr_required = 1, - .keep_mapped = 1, - }, - { - .addr = (void *)((1UL << 47) - (2UL << 20)), - .size = 4UL << 20, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap((1UL << 47) - (2UL << 20), 4UL << 20, MAP_FIXED | MAP_HUGETLB)", - }, -}; - -int main(int argc, char **argv) -{ - int i; - void *p; - - for (i = 0; i < ARRAY_SIZE(testcases); i++) { - struct testcase *t = testcases + i; - - p = mmap(t->addr, t->size, PROT_NONE, t->flags, -1, 0); - - printf("%s: %p - ", t->msg, p); - - if (p == MAP_FAILED) { - printf("FAILED\n"); - continue; - } - - if (t->low_addr_required && p >= (void *)(1UL << 47)) - printf("FAILED\n"); - else - printf("OK\n"); - if (!t->keep_mapped) - munmap(p, t->size); - } - return 0; -} diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 5d4f10ac2af2..d744991c0f4f 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -5,16 +5,26 @@ include ../lib.mk .PHONY: all all_32 all_64 warn_32bit_failure clean -TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \ - check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \ +UNAME_M := $(shell uname -m) +CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32) +CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) + +TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ + check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \ protection_keys test_vdso test_vsyscall TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer -TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip 5lvl +TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip +# Some selftests require 32bit support enabled also on 64bit systems +TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall -TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) +TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) $(TARGETS_C_32BIT_NEEDED) TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY) +ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),11) +TARGETS_C_64BIT_ALL += $(TARGETS_C_32BIT_NEEDED) +endif + BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32) BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64) @@ -23,18 +33,28 @@ BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64)) CFLAGS := -O2 -g -std=gnu99 -pthread -Wall -no-pie -UNAME_M := $(shell uname -m) -CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32) -CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) +define gen-target-rule-32 +$(1) $(1)_32: $(OUTPUT)/$(1)_32 +.PHONY: $(1) $(1)_32 +endef + +define gen-target-rule-64 +$(1) $(1)_64: $(OUTPUT)/$(1)_64 +.PHONY: $(1) $(1)_64 +endef ifeq ($(CAN_BUILD_I386),1) all: all_32 TEST_PROGS += $(BINARIES_32) +EXTRA_CFLAGS += -DCAN_BUILD_32 +$(foreach t,$(TARGETS_C_32BIT_ALL),$(eval $(call gen-target-rule-32,$(t)))) endif ifeq ($(CAN_BUILD_X86_64),1) all: all_64 TEST_PROGS += $(BINARIES_64) +EXTRA_CFLAGS += -DCAN_BUILD_64 +$(foreach t,$(TARGETS_C_64BIT_ALL),$(eval $(call gen-target-rule-64,$(t)))) endif all_32: $(BINARIES_32) diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c index 361466a2eaef..ade443a88421 100644 --- a/tools/testing/selftests/x86/entry_from_vm86.c +++ b/tools/testing/selftests/x86/entry_from_vm86.c @@ -95,6 +95,10 @@ asm ( "int3\n\t" "vmcode_int80:\n\t" "int $0x80\n\t" + "vmcode_popf_hlt:\n\t" + "push %ax\n\t" + "popf\n\t" + "hlt\n\t" "vmcode_umip:\n\t" /* addressing via displacements */ "smsw (2052)\n\t" @@ -124,8 +128,8 @@ asm ( extern unsigned char vmcode[], end_vmcode[]; extern unsigned char vmcode_bound[], vmcode_sysenter[], vmcode_syscall[], - vmcode_sti[], vmcode_int3[], vmcode_int80[], vmcode_umip[], - vmcode_umip_str[], vmcode_umip_sldt[]; + vmcode_sti[], vmcode_int3[], vmcode_int80[], vmcode_popf_hlt[], + vmcode_umip[], vmcode_umip_str[], vmcode_umip_sldt[]; /* Returns false if the test was skipped. */ static bool do_test(struct vm86plus_struct *v86, unsigned long eip, @@ -175,7 +179,7 @@ static bool do_test(struct vm86plus_struct *v86, unsigned long eip, (VM86_TYPE(ret) == rettype && VM86_ARG(ret) == retarg)) { printf("[OK]\tReturned correctly\n"); } else { - printf("[FAIL]\tIncorrect return reason\n"); + printf("[FAIL]\tIncorrect return reason (started at eip = 0x%lx, ended at eip = 0x%lx)\n", eip, v86->regs.eip); nerrs++; } @@ -264,6 +268,9 @@ int main(void) v86.regs.ds = load_addr / 16; v86.regs.es = load_addr / 16; + /* Use the end of the page as our stack. */ + v86.regs.esp = 4096; + assert((v86.regs.cs & 3) == 0); /* Looks like RPL = 0 */ /* #BR -- should deliver SIG??? */ @@ -295,6 +302,23 @@ int main(void) v86.regs.eflags &= ~X86_EFLAGS_IF; do_test(&v86, vmcode_sti - vmcode, VM86_STI, 0, "STI with VIP set"); + /* POPF with VIP set but IF clear: should not trap */ + v86.regs.eflags = X86_EFLAGS_VIP; + v86.regs.eax = 0; + do_test(&v86, vmcode_popf_hlt - vmcode, VM86_UNKNOWN, 0, "POPF with VIP set and IF clear"); + + /* POPF with VIP set and IF set: should trap */ + v86.regs.eflags = X86_EFLAGS_VIP; + v86.regs.eax = X86_EFLAGS_IF; + do_test(&v86, vmcode_popf_hlt - vmcode, VM86_STI, 0, "POPF with VIP and IF set"); + + /* POPF with VIP clear and IF set: should not trap */ + v86.regs.eflags = 0; + v86.regs.eax = X86_EFLAGS_IF; + do_test(&v86, vmcode_popf_hlt - vmcode, VM86_UNKNOWN, 0, "POPF with VIP clear and IF set"); + + v86.regs.eflags = 0; + /* INT3 -- should cause #BP */ do_test(&v86, vmcode_int3 - vmcode, VM86_TRAP, 3, "INT3"); @@ -318,7 +342,7 @@ int main(void) clearhandler(SIGSEGV); /* Make sure nothing explodes if we fork. */ - if (fork() > 0) + if (fork() == 0) return 0; return (nerrs == 0 ? 0 : 1); diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c index ec0f6b45ce8b..9c0325e1ea68 100644 --- a/tools/testing/selftests/x86/mpx-mini-test.c +++ b/tools/testing/selftests/x86/mpx-mini-test.c @@ -315,11 +315,39 @@ static inline void *__si_bounds_upper(siginfo_t *si) return si->si_upper; } #else + +/* + * This deals with old version of _sigfault in some distros: + * + +old _sigfault: + struct { + void *si_addr; + } _sigfault; + +new _sigfault: + struct { + void __user *_addr; + int _trapno; + short _addr_lsb; + union { + struct { + void __user *_lower; + void __user *_upper; + } _addr_bnd; + __u32 _pkey; + }; + } _sigfault; + * + */ + static inline void **__si_bounds_hack(siginfo_t *si) { void *sigfault = &si->_sifields._sigfault; void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault); - void **__si_lower = end_sigfault; + int *trapno = (int*)end_sigfault; + /* skip _trapno and _addr_lsb */ + void **__si_lower = (void**)(trapno + 2); return __si_lower; } @@ -331,7 +359,7 @@ static inline void *__si_bounds_lower(siginfo_t *si) static inline void *__si_bounds_upper(siginfo_t *si) { - return (*__si_bounds_hack(si)) + sizeof(void *); + return *(__si_bounds_hack(si) + 1); } #endif diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c index bc1b0735bb50..f15aa5a76fe3 100644 --- a/tools/testing/selftests/x86/protection_keys.c +++ b/tools/testing/selftests/x86/protection_keys.c @@ -393,34 +393,6 @@ pid_t fork_lazy_child(void) return forkret; } -void davecmp(void *_a, void *_b, int len) -{ - int i; - unsigned long *a = _a; - unsigned long *b = _b; - - for (i = 0; i < len / sizeof(*a); i++) { - if (a[i] == b[i]) - continue; - - dprintf3("[%3d]: a: %016lx b: %016lx\n", i, a[i], b[i]); - } -} - -void dumpit(char *f) -{ - int fd = open(f, O_RDONLY); - char buf[100]; - int nr_read; - - dprintf2("maps fd: %d\n", fd); - do { - nr_read = read(fd, &buf[0], sizeof(buf)); - write(1, buf, nr_read); - } while (nr_read > 0); - close(fd); -} - #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c index 1ae1c5a7392e..6f22238f3217 100644 --- a/tools/testing/selftests/x86/ptrace_syscall.c +++ b/tools/testing/selftests/x86/ptrace_syscall.c @@ -183,8 +183,10 @@ static void test_ptrace_syscall_restart(void) if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0) err(1, "PTRACE_TRACEME"); + pid_t pid = getpid(), tid = syscall(SYS_gettid); + printf("\tChild will make one syscall\n"); - raise(SIGSTOP); + syscall(SYS_tgkill, pid, tid, SIGSTOP); syscall(SYS_gettid, 10, 11, 12, 13, 14, 15); _exit(0); @@ -301,9 +303,11 @@ static void test_restart_under_ptrace(void) if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0) err(1, "PTRACE_TRACEME"); + pid_t pid = getpid(), tid = syscall(SYS_gettid); + printf("\tChild will take a nap until signaled\n"); setsigign(SIGUSR1, SA_RESTART); - raise(SIGSTOP); + syscall(SYS_tgkill, pid, tid, SIGSTOP); syscall(SYS_pause, 0, 0, 0, 0, 0, 0); _exit(0); diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c index a48da95c18fd..ddfdd635de16 100644 --- a/tools/testing/selftests/x86/single_step_syscall.c +++ b/tools/testing/selftests/x86/single_step_syscall.c @@ -119,7 +119,9 @@ static void check_result(void) int main() { +#ifdef CAN_BUILD_32 int tmp; +#endif sethandler(SIGTRAP, sigtrap, 0); @@ -139,12 +141,13 @@ int main() : : "c" (post_nop) : "r11"); check_result(); #endif - +#ifdef CAN_BUILD_32 printf("[RUN]\tSet TF and check int80\n"); set_eflags(get_eflags() | X86_EFLAGS_TF); asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid) : INT80_CLOBBERS); check_result(); +#endif /* * This test is particularly interesting if fast syscalls use diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c index bf0d687c7db7..64f11c8d9b76 100644 --- a/tools/testing/selftests/x86/test_mremap_vdso.c +++ b/tools/testing/selftests/x86/test_mremap_vdso.c @@ -90,8 +90,12 @@ int main(int argc, char **argv, char **envp) vdso_size += PAGE_SIZE; } +#ifdef __i386__ /* Glibc is likely to explode now - exit with raw syscall */ asm volatile ("int $0x80" : : "a" (__NR_exit), "b" (!!ret)); +#else /* __x86_64__ */ + syscall(SYS_exit, ret); +#endif } else { int status; diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c index 29973cde06d3..235259011704 100644 --- a/tools/testing/selftests/x86/test_vdso.c +++ b/tools/testing/selftests/x86/test_vdso.c @@ -26,20 +26,59 @@ # endif #endif +/* max length of lines in /proc/self/maps - anything longer is skipped here */ +#define MAPS_LINE_LEN 128 + int nerrs = 0; +typedef long (*getcpu_t)(unsigned *, unsigned *, void *); + +getcpu_t vgetcpu; +getcpu_t vdso_getcpu; + +static void *vsyscall_getcpu(void) +{ #ifdef __x86_64__ -# define VSYS(x) (x) + FILE *maps; + char line[MAPS_LINE_LEN]; + bool found = false; + + maps = fopen("/proc/self/maps", "r"); + if (!maps) /* might still be present, but ignore it here, as we test vDSO not vsyscall */ + return NULL; + + while (fgets(line, MAPS_LINE_LEN, maps)) { + char r, x; + void *start, *end; + char name[MAPS_LINE_LEN]; + + /* sscanf() is safe here as strlen(name) >= strlen(line) */ + if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s", + &start, &end, &r, &x, name) != 5) + continue; + + if (strcmp(name, "[vsyscall]")) + continue; + + /* assume entries are OK, as we test vDSO here not vsyscall */ + found = true; + break; + } + + fclose(maps); + + if (!found) { + printf("Warning: failed to find vsyscall getcpu\n"); + return NULL; + } + return (void *) (0xffffffffff600800); #else -# define VSYS(x) 0 + return NULL; #endif +} -typedef long (*getcpu_t)(unsigned *, unsigned *, void *); - -const getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800); -getcpu_t vdso_getcpu; -void fill_function_pointers() +static void fill_function_pointers() { void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); @@ -54,6 +93,8 @@ void fill_function_pointers() vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu"); if (!vdso_getcpu) printf("Warning: failed to find getcpu in vDSO\n"); + + vgetcpu = (getcpu_t) vsyscall_getcpu(); } static long sys_getcpu(unsigned * cpu, unsigned * node, diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c index 7a744fa7b786..0b4f1cc2291c 100644 --- a/tools/testing/selftests/x86/test_vsyscall.c +++ b/tools/testing/selftests/x86/test_vsyscall.c @@ -33,6 +33,9 @@ # endif #endif +/* max length of lines in /proc/self/maps - anything longer is skipped here */ +#define MAPS_LINE_LEN 128 + static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags) { @@ -98,7 +101,7 @@ static int init_vsys(void) #ifdef __x86_64__ int nerrs = 0; FILE *maps; - char line[128]; + char line[MAPS_LINE_LEN]; bool found = false; maps = fopen("/proc/self/maps", "r"); @@ -108,10 +111,12 @@ static int init_vsys(void) return 0; } - while (fgets(line, sizeof(line), maps)) { + while (fgets(line, MAPS_LINE_LEN, maps)) { char r, x; void *start, *end; - char name[128]; + char name[MAPS_LINE_LEN]; + + /* sscanf() is safe here as strlen(name) >= strlen(line) */ if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s", &start, &end, &r, &x, name) != 5) continue; @@ -445,7 +450,7 @@ static void sigtrap(int sig, siginfo_t *info, void *ctx_void) num_vsyscall_traps++; } -static int test_native_vsyscall(void) +static int test_emulation(void) { time_t tmp; bool is_native; @@ -453,7 +458,7 @@ static int test_native_vsyscall(void) if (!vtime) return 0; - printf("[RUN]\tchecking for native vsyscall\n"); + printf("[RUN]\tchecking that vsyscalls are emulated\n"); sethandler(SIGTRAP, sigtrap, 0); set_eflags(get_eflags() | X86_EFLAGS_TF); vtime(&tmp); @@ -469,11 +474,12 @@ static int test_native_vsyscall(void) */ is_native = (num_vsyscall_traps > 1); - printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n", + printf("[%s]\tvsyscalls are %s (%d instructions in vsyscall page)\n", + (is_native ? "FAIL" : "OK"), (is_native ? "native" : "emulated"), (int)num_vsyscall_traps); - return 0; + return is_native; } #endif @@ -493,7 +499,7 @@ int main(int argc, char **argv) nerrs += test_vsys_r(); #ifdef __x86_64__ - nerrs += test_native_vsyscall(); + nerrs += test_emulation(); #endif return nerrs ? 1 : 0; |