diff options
author | Eric Christopher <echristo@gmail.com> | 2019-04-17 04:52:47 +0000 |
---|---|---|
committer | Eric Christopher <echristo@gmail.com> | 2019-04-17 04:52:47 +0000 |
commit | cee313d288a4faf0355d76fb6e0e927e211d08a5 (patch) | |
tree | d386075318d761197779a96e5d8fc0dc7b06342b /llvm/test/Transforms/SampleProfile | |
parent | c3d6a929fdd92fd06d4304675ade8d7210ee711a (diff) | |
download | bcm5719-llvm-cee313d288a4faf0355d76fb6e0e927e211d08a5.tar.gz bcm5719-llvm-cee313d288a4faf0355d76fb6e0e927e211d08a5.zip |
Revert "Temporarily Revert "Add basic loop fusion pass.""
The reversion apparently deleted the test/Transforms directory.
Will be re-reverting again.
llvm-svn: 358552
Diffstat (limited to 'llvm/test/Transforms/SampleProfile')
71 files changed, 3398 insertions, 0 deletions
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/bad_discriminator_value.prof b/llvm/test/Transforms/SampleProfile/Inputs/bad_discriminator_value.prof new file mode 100644 index 00000000000..30e26cc8184 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/bad_discriminator_value.prof @@ -0,0 +1,2 @@ +empty:100:0 + 1.-3: 10 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/bad_fn_header.prof b/llvm/test/Transforms/SampleProfile/Inputs/bad_fn_header.prof new file mode 100644 index 00000000000..62227746655 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/bad_fn_header.prof @@ -0,0 +1,3 @@ +3empty:100:BAD + 0: 0 + 1: 100 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/bad_line_values.prof b/llvm/test/Transforms/SampleProfile/Inputs/bad_line_values.prof new file mode 100644 index 00000000000..61ba7c01591 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/bad_line_values.prof @@ -0,0 +1,2 @@ +empty:100:0 +-1: 10 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/bad_mangle.prof b/llvm/test/Transforms/SampleProfile/Inputs/bad_mangle.prof new file mode 100644 index 00000000000..33b4c42cab4 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/bad_mangle.prof @@ -0,0 +1,3 @@ +double convert<std::string, float>(float):2909472:181842 + 0: 181842 + 1: 181842 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/bad_sample_line.prof b/llvm/test/Transforms/SampleProfile/Inputs/bad_sample_line.prof new file mode 100644 index 00000000000..608affa3ff9 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/bad_sample_line.prof @@ -0,0 +1,3 @@ +empty:100:0 + 0: 0 + 1: BAD diff --git a/llvm/test/Transforms/SampleProfile/Inputs/bad_samples.prof b/llvm/test/Transforms/SampleProfile/Inputs/bad_samples.prof new file mode 100644 index 00000000000..bce7db9708d --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/bad_samples.prof @@ -0,0 +1,2 @@ +empty:100:0 + 1.3: -10 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/branch.prof b/llvm/test/Transforms/SampleProfile/Inputs/branch.prof new file mode 100644 index 00000000000..035af631a7a --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/branch.prof @@ -0,0 +1,10 @@ +main:15680:2500 + 1: 2500 + 4: 1000 + 5: 1000 + 6: 800 + 7: 500 + 9: 10226 + 10: 2243 + 16: 0 + 18: 0 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/calls.prof b/llvm/test/Transforms/SampleProfile/Inputs/calls.prof new file mode 100644 index 00000000000..be64a1ead42 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/calls.prof @@ -0,0 +1,10 @@ +_Z3sumii:105580:5279 + 0: 5279 + 1: 5279 + 2: 5279 +main:225715:0 + 2.1: 5553 + 3: 5391 + # This indicates that at line 3 of this function, the 'then' branch + # of the conditional is taken (discriminator '1'). + 3.1: 5752 _Z3sumii:5860 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/cold-indirect-call.prof b/llvm/test/Transforms/SampleProfile/Inputs/cold-indirect-call.prof new file mode 100644 index 00000000000..636ed7e0068 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/cold-indirect-call.prof @@ -0,0 +1,6 @@ +foo:5000:1 + 1: 2000 quz:1000 + 1: bar:3000 + 1: 3000 + 1: baz:0 + 1: 0 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/cov-zero-samples.prof b/llvm/test/Transforms/SampleProfile/Inputs/cov-zero-samples.prof new file mode 100644 index 00000000000..528e42ca388 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/cov-zero-samples.prof @@ -0,0 +1,10 @@ +main:20111403:0 + 2.1: 404065 + 3: 443089 + 3.1: 0 + 4: 404066 + 6: 0 + 7: 0 + 3.1: _Z12never_calledi:0 + 0: 0 + 1: 0 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/coverage-warning.prof b/llvm/test/Transforms/SampleProfile/Inputs/coverage-warning.prof new file mode 100644 index 00000000000..57989b837a0 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/coverage-warning.prof @@ -0,0 +1,5 @@ +foo:30000:100 + 2: 28000 + 3: 1000 +# This profile is stale. Function foo() does not have a line 8 anymore. + 8: 1700 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/discriminator.prof b/llvm/test/Transforms/SampleProfile/Inputs/discriminator.prof new file mode 100644 index 00000000000..0c2561d725c --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/discriminator.prof @@ -0,0 +1,8 @@ +foo:1000:0 + 1: 1 + 2: 1 + 2.1: 100 + 3: 100 + 3.1: 5 + 4: 100 + 5: 1 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/einline.prof b/llvm/test/Transforms/SampleProfile/Inputs/einline.prof new file mode 100644 index 00000000000..624990b47ef --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/einline.prof @@ -0,0 +1,7 @@ +_Z3foov:200:100 + 1: _Z3barv:0 + 2: no_inline:100 + 3: _Z3barv:100 +recursive:200:100 + 1: recursive:100 + 2: recursive:100 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/entry_counts.prof b/llvm/test/Transforms/SampleProfile/Inputs/entry_counts.prof new file mode 100644 index 00000000000..95addc9f7a1 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/entry_counts.prof @@ -0,0 +1,3 @@ +empty:100:13293 + 0: 0 + 1: 100 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/entry_counts_cold.prof b/llvm/test/Transforms/SampleProfile/Inputs/entry_counts_cold.prof new file mode 100644 index 00000000000..cd7e871d6c2 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/entry_counts_cold.prof @@ -0,0 +1,20 @@ +top:200:100 + 1: 100 foo:100 + 2: 100 + 3: 2 + 4: 100 + 1: foo:100 + 2: 100 + 3: 100 bar:100 + 4: 100 + 3: bar:2 + 1: 2 + 2: 2 +foo:200:150 + 2: 150 + 3: 150 bar:150 + 4: 150 +bar:450:300 + 1: 300 baz:300 + 2: 300 + 3: 300 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/flattened.prof b/llvm/test/Transforms/SampleProfile/Inputs/flattened.prof new file mode 100644 index 00000000000..962bc6e58e5 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/flattened.prof @@ -0,0 +1,2 @@ +foo:100:100 + 1: 100 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/fnptr.binprof b/llvm/test/Transforms/SampleProfile/Inputs/fnptr.binprof Binary files differnew file mode 100644 index 00000000000..71934365218 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/fnptr.binprof diff --git a/llvm/test/Transforms/SampleProfile/Inputs/fnptr.prof b/llvm/test/Transforms/SampleProfile/Inputs/fnptr.prof new file mode 100644 index 00000000000..01680d846d8 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/fnptr.prof @@ -0,0 +1,13 @@ +_Z3fooi:7711:610 + 1: 610 +_Z3bari:20301:1437 + 1: 1437 +main:184019:0 + 3: 0 + 4: 534 + 6: 2080 + 9: 2064 _Z3bari:1471 _Z3fooi:631 + 5.1: 1075 + 5: 1075 + 7: 534 + 4.2: 534 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/function_metadata.compact.afdo b/llvm/test/Transforms/SampleProfile/Inputs/function_metadata.compact.afdo Binary files differnew file mode 100644 index 00000000000..20bd8967077 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/function_metadata.compact.afdo diff --git a/llvm/test/Transforms/SampleProfile/Inputs/function_metadata.prof b/llvm/test/Transforms/SampleProfile/Inputs/function_metadata.prof new file mode 100644 index 00000000000..621bed722b7 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/function_metadata.prof @@ -0,0 +1,17 @@ +test:3200:0 + 1: 100 + 2: 100 + 3: foo:1000 + 1: 800 + 3: bar:200 + 2: 190 + 4: baz:10 + 2: 10 + 4: foo1:1000 + 1: 1000 + 4: foo2:1000 + 1: 1000 foo3:1000 +test_liveness:1000:0 + 1: foo:1000 + 1: foo_available:1000 + 2: 1000 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/gcc-simple.afdo b/llvm/test/Transforms/SampleProfile/Inputs/gcc-simple.afdo Binary files differnew file mode 100644 index 00000000000..93f22ce3053 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/gcc-simple.afdo diff --git a/llvm/test/Transforms/SampleProfile/Inputs/indirect-call.afdo b/llvm/test/Transforms/SampleProfile/Inputs/indirect-call.afdo Binary files differnew file mode 100644 index 00000000000..2d5b345e960 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/indirect-call.afdo diff --git a/llvm/test/Transforms/SampleProfile/Inputs/indirect-call.compact.afdo b/llvm/test/Transforms/SampleProfile/Inputs/indirect-call.compact.afdo Binary files differnew file mode 100644 index 00000000000..579f03c8515 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/indirect-call.compact.afdo diff --git a/llvm/test/Transforms/SampleProfile/Inputs/indirect-call.prof b/llvm/test/Transforms/SampleProfile/Inputs/indirect-call.prof new file mode 100644 index 00000000000..5cbfc0a73bc --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/indirect-call.prof @@ -0,0 +1,31 @@ +test:63067:0 + 1: 3345 _Z3barv:1398 _Z3foov:2059 +test_inline:3000:0 + 1: 1000 foo_inline3:1000 + 1: foo_inline1:3000 + 11: 3000 + 1: foo_inline2:4000 + 19: 4000 +test_noinline:3000:0 + 1: foo_noinline:3000 + 20: 3000 +test_direct:3000:0 + 1: foo_direct:3000 + 21: 3000 +test_inline_strip:3000:0 + 1: foo_inline_strip:3000 + 1: 3000 +test_inline_strip_conflict:3000:0 + 1: foo_inline_strip_conflict:3000 + 1: 3000 +test_norecursive_inline:3000:0 + 1: test_norecursive_inline:3000 + 20: 3000 +test_noinline_bitcast:3000:0 + 1: foo_direct_i32:3000 + 1: 3000 +return_arg_caller:3000:0 + 1: foo_inline1:3000 + 11: 3000 + 2: return_arg:3000 + 1: 3000 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-act.prof b/llvm/test/Transforms/SampleProfile/Inputs/inline-act.prof new file mode 100644 index 00000000000..655739f3788 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/inline-act.prof @@ -0,0 +1,3 @@ +_Z3bari:100:0 + 1: _Z3fooi:100 + 2: 100 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-combine.prof b/llvm/test/Transforms/SampleProfile/Inputs/inline-combine.prof new file mode 100644 index 00000000000..8d1c0b8103c --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/inline-combine.prof @@ -0,0 +1,2 @@ +foo:1000:1000 + 1: bar:1000 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-coverage.prof b/llvm/test/Transforms/SampleProfile/Inputs/inline-coverage.prof new file mode 100644 index 00000000000..6f38a1ca8c7 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/inline-coverage.prof @@ -0,0 +1,7 @@ +main:501438:0 + 2.1: 23478 + 3: 23478 + 4: 0 + 0: 0 + 3: _Z3fool:172746 + 1: 31878 rand:31878 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-hint.prof b/llvm/test/Transforms/SampleProfile/Inputs/inline-hint.prof new file mode 100644 index 00000000000..a6840346eb4 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/inline-hint.prof @@ -0,0 +1,3 @@ +_Z6hot_fnRxi:700:0 +_Z7cold_fnRxi:1:0 +other:299:0 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline.compactbinary.afdo b/llvm/test/Transforms/SampleProfile/Inputs/inline.compactbinary.afdo Binary files differnew file mode 100644 index 00000000000..c9fde769982 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/inline.compactbinary.afdo diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline.prof b/llvm/test/Transforms/SampleProfile/Inputs/inline.prof new file mode 100644 index 00000000000..386cdf8a7b5 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/inline.prof @@ -0,0 +1,7 @@ +main:225715:0 + 2.1: 5553 + 3: 5391 + 3.1: _Z3sumii:5860 + 0: 5279 + 1: 5279 + 2: 5279 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/nodebug.prof b/llvm/test/Transforms/SampleProfile/Inputs/nodebug.prof new file mode 100644 index 00000000000..48596035758 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/nodebug.prof @@ -0,0 +1,2 @@ +foo:100:10 + 0: bar:10 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/nolocinfo.prof b/llvm/test/Transforms/SampleProfile/Inputs/nolocinfo.prof new file mode 100644 index 00000000000..fc69aa8ae78 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/nolocinfo.prof @@ -0,0 +1,3 @@ +foo:30000:100 + 2: 28000 + 3: 1000 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/offset.prof b/llvm/test/Transforms/SampleProfile/Inputs/offset.prof new file mode 100644 index 00000000000..b07ce3504fb --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/offset.prof @@ -0,0 +1,4 @@ +_Z3fooi:300:1 + 65532: 1000 + 65533: 10 + 65535: 990 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/propagate.prof b/llvm/test/Transforms/SampleProfile/Inputs/propagate.prof new file mode 100644 index 00000000000..f298752d03c --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/propagate.prof @@ -0,0 +1,22 @@ +_Z3fooiil:33168:0 + 0: 0 + 1: 0 + 2: 0 + 4: 0 + 4.1: 302 + 4.2: 315 + 5: 302 + 6: 200 + 7: 308 + 8: 227 + 9: 227 + 10: 227 + 11: 83 + 11.1: 7553 + 11.2: 7479 + 12: 7479 + 13: 7479 + 16: 305 + 18: 0 + 19: 0 + 65533: 308 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/remap.map b/llvm/test/Transforms/SampleProfile/Inputs/remap.map new file mode 100644 index 00000000000..df3d82d38bd --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/remap.map @@ -0,0 +1,8 @@ +# foo:: and foo::detail:: are equivalent +name 3foo N3foo6detailE + +# foo::qux and foo::quux are equivalent +type N3foo3quxE N3foo4quuxE + +# N::X and M::X are equivalent +name N1N1XE N1M1XE diff --git a/llvm/test/Transforms/SampleProfile/Inputs/remap.prof b/llvm/test/Transforms/SampleProfile/Inputs/remap.prof new file mode 100644 index 00000000000..8244a51a165 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/remap.prof @@ -0,0 +1,10 @@ +_ZN3foo3barERKN1N1XINS_4quuxEEE:15680:2500 + 1: 2500 + 4: 1000 + 5: 1000 + 6: 800 + 7: 500 + 9: 10226 + 10: 2243 + 16: 0 + 18: 0 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/remarks.prof b/llvm/test/Transforms/SampleProfile/Inputs/remarks.prof new file mode 100644 index 00000000000..1e905834cf4 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/remarks.prof @@ -0,0 +1,7 @@ +main:623868:0 + 0: 0 + 0: _Z3foov:623868 + 3: 18346 + 4: 0 + 6: 19475 + 2: 18305 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/summary.prof b/llvm/test/Transforms/SampleProfile/Inputs/summary.prof new file mode 100644 index 00000000000..e80b9bc171b --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/summary.prof @@ -0,0 +1,9 @@ +bar:100:3 + 1: 100 +foo:200:1 + 1: 200 +baz:600:1 + 1: 0 + 2: 300 + 1: bar:300 + 1: 300 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/syntax.prof b/llvm/test/Transforms/SampleProfile/Inputs/syntax.prof new file mode 100644 index 00000000000..465212d86e8 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/syntax.prof @@ -0,0 +1,3 @@ +empty:100:0 + 0: 0 + 1: 100 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/warm-inline-instance.prof b/llvm/test/Transforms/SampleProfile/Inputs/warm-inline-instance.prof new file mode 100644 index 00000000000..a1b0e27dd58 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/warm-inline-instance.prof @@ -0,0 +1,11 @@ +main:2257150:0 + 2.1: 5553 + 3: 5391 + 3.1: foo:5860 + 0: 5279 + 1: 5279 + 2: 5279 + 4.1: goo:60 + 0: 20 + 1: 20 + 2: 20 diff --git a/llvm/test/Transforms/SampleProfile/branch.ll b/llvm/test/Transforms/SampleProfile/branch.ll new file mode 100644 index 00000000000..d204e64efd8 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/branch.ll @@ -0,0 +1,242 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/branch.prof | opt -analyze -branch-prob | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/branch.prof | opt -analyze -branch-prob | FileCheck %s + +; Original C++ code for this test case: +; +; #include <stdio.h> +; #include <stdlib.h> + +; int main(int argc, char *argv[]) { +; if (argc < 2) +; return 1; +; double result; +; int limit = atoi(argv[1]); +; if (limit > 100) { +; double s = 23.041968 * atoi(argv[2]); +; for (int u = 0; u < limit; u++) { +; double x = s; +; s = x + 3.049 + (double)u; +; s -= s + 3.94 / x * 0.32; +; } +; result = s; +; } else { +; result = atoi(argv[2]); +; } +; printf("result is %lf\n", result); +; return 0; +; } + +@.str = private unnamed_addr constant [15 x i8] c"result is %lf\0A\00", align 1 + +; Function Attrs: uwtable +define i32 @main(i32 %argc, i8** %argv) #0 !dbg !6 { +; CHECK: Printing analysis 'Branch Probability Analysis' for function 'main': + +entry: + %retval = alloca i32, align 4 + %argc.addr = alloca i32, align 4 + %argv.addr = alloca i8**, align 8 + %result = alloca double, align 8 + %limit = alloca i32, align 4 + %s = alloca double, align 8 + %u = alloca i32, align 4 + %x = alloca double, align 8 + store i32 0, i32* %retval, align 4 + store i32 %argc, i32* %argc.addr, align 4 + call void @llvm.dbg.declare(metadata i32* %argc.addr, metadata !16, metadata !17), !dbg !18 + store i8** %argv, i8*** %argv.addr, align 8 + call void @llvm.dbg.declare(metadata i8*** %argv.addr, metadata !19, metadata !17), !dbg !20 + %0 = load i32, i32* %argc.addr, align 4, !dbg !21 + %cmp = icmp slt i32 %0, 2, !dbg !23 + br i1 %cmp, label %if.then, label %if.end, !dbg !24 +; CHECK: edge entry -> if.then probability is 0x4ccf6b16 / 0x80000000 = 60.01% +; CHECK: edge entry -> if.end probability is 0x333094ea / 0x80000000 = 39.99% + +if.then: ; preds = %entry + store i32 1, i32* %retval, align 4, !dbg !25 + br label %return, !dbg !25 + +if.end: ; preds = %entry + call void @llvm.dbg.declare(metadata double* %result, metadata !26, metadata !17), !dbg !27 + call void @llvm.dbg.declare(metadata i32* %limit, metadata !28, metadata !17), !dbg !29 + %1 = load i8**, i8*** %argv.addr, align 8, !dbg !30 + %arrayidx = getelementptr inbounds i8*, i8** %1, i64 1, !dbg !30 + %2 = load i8*, i8** %arrayidx, align 8, !dbg !30 + %call = call i32 @atoi(i8* %2) #4, !dbg !31 + store i32 %call, i32* %limit, align 4, !dbg !29 + %3 = load i32, i32* %limit, align 4, !dbg !32 + %cmp1 = icmp sgt i32 %3, 100, !dbg !34 + br i1 %cmp1, label %if.then.2, label %if.else, !dbg !35 +; CHECK: edge if.end -> if.then.2 probability is 0x6652c748 / 0x80000000 = 79.94% +; CHECK: edge if.end -> if.else probability is 0x19ad38b8 / 0x80000000 = 20.06% + +if.then.2: ; preds = %if.end + call void @llvm.dbg.declare(metadata double* %s, metadata !36, metadata !17), !dbg !38 + %4 = load i8**, i8*** %argv.addr, align 8, !dbg !39 + %arrayidx3 = getelementptr inbounds i8*, i8** %4, i64 2, !dbg !39 + %5 = load i8*, i8** %arrayidx3, align 8, !dbg !39 + %call4 = call i32 @atoi(i8* %5) #4, !dbg !40 + %conv = sitofp i32 %call4 to double, !dbg !40 + %mul = fmul double 0x40370ABE6A337A81, %conv, !dbg !41 + store double %mul, double* %s, align 8, !dbg !38 + call void @llvm.dbg.declare(metadata i32* %u, metadata !42, metadata !17), !dbg !44 + store i32 0, i32* %u, align 4, !dbg !44 + br label %for.cond, !dbg !45 + +for.cond: ; preds = %for.inc, %if.then.2 + %6 = load i32, i32* %u, align 4, !dbg !46 + %7 = load i32, i32* %limit, align 4, !dbg !48 + %cmp5 = icmp slt i32 %6, %7, !dbg !49 + br i1 %cmp5, label %for.body, label %for.end, !dbg !50, !prof !80 +; CHECK: edge for.cond -> for.body probability is 0x73333333 / 0x80000000 = 90.00% +; CHECK: edge for.cond -> for.end probability is 0x0ccccccd / 0x80000000 = 10.00% + +for.body: ; preds = %for.cond + call void @llvm.dbg.declare(metadata double* %x, metadata !51, metadata !17), !dbg !53 + %8 = load double, double* %s, align 8, !dbg !54 + store double %8, double* %x, align 8, !dbg !53 + %9 = load double, double* %x, align 8, !dbg !55 + %add = fadd double %9, 3.049000e+00, !dbg !56 + %10 = load i32, i32* %u, align 4, !dbg !57 + %conv6 = sitofp i32 %10 to double, !dbg !57 + %add7 = fadd double %add, %conv6, !dbg !58 + store double %add7, double* %s, align 8, !dbg !59 + %11 = load double, double* %s, align 8, !dbg !60 + %12 = load double, double* %x, align 8, !dbg !61 + %div = fdiv double 3.940000e+00, %12, !dbg !62 + %mul8 = fmul double %div, 3.200000e-01, !dbg !63 + %add9 = fadd double %11, %mul8, !dbg !64 + %13 = load double, double* %s, align 8, !dbg !65 + %sub = fsub double %13, %add9, !dbg !65 + store double %sub, double* %s, align 8, !dbg !65 + br label %for.inc, !dbg !66 + +for.inc: ; preds = %for.body + %14 = load i32, i32* %u, align 4, !dbg !67 + %inc = add nsw i32 %14, 1, !dbg !67 + store i32 %inc, i32* %u, align 4, !dbg !67 + br label %for.cond, !dbg !68 + +for.end: ; preds = %for.cond + %15 = load double, double* %s, align 8, !dbg !69 + store double %15, double* %result, align 8, !dbg !70 + br label %if.end.13, !dbg !71 + +if.else: ; preds = %if.end + %16 = load i8**, i8*** %argv.addr, align 8, !dbg !72 + %arrayidx10 = getelementptr inbounds i8*, i8** %16, i64 2, !dbg !72 + %17 = load i8*, i8** %arrayidx10, align 8, !dbg !72 + %call11 = call i32 @atoi(i8* %17) #4, !dbg !74 + %conv12 = sitofp i32 %call11 to double, !dbg !74 + store double %conv12, double* %result, align 8, !dbg !75 + br label %if.end.13 + +if.end.13: ; preds = %if.else, %for.end + %18 = load double, double* %result, align 8, !dbg !76 + %call14 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str, i32 0, i32 0), double %18), !dbg !77 + store i32 0, i32* %retval, align 4, !dbg !78 + br label %return, !dbg !78 + +return: ; preds = %if.end.13, %if.then + %19 = load i32, i32* %retval, align 4, !dbg !79 + ret i32 %19, !dbg !79 +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +; Function Attrs: nounwind readonly +declare i32 @atoi(i8*) #2 + +declare i32 @printf(i8*, ...) #3 + +attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #4 = { nounwind readonly } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!13, !14} +!llvm.ident = !{!15} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 248211) (llvm/trunk 248217)", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !3) +!1 = !DIFile(filename: "test.cc", directory: "/ssd/llvm_commit") +!2 = !{} +!3 = !{!4} +!4 = !DIBasicType(name: "double", size: 64, align: 64, encoding: DW_ATE_float) +!6 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 4, type: !7, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) +!7 = !DISubroutineType(types: !8) +!8 = !{!9, !9, !10} +!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64, align: 64) +!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64, align: 64) +!12 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char) +!13 = !{i32 2, !"Dwarf Version", i32 4} +!14 = !{i32 2, !"Debug Info Version", i32 3} +!15 = !{!"clang version 3.8.0 (trunk 248211) (llvm/trunk 248217)"} +!16 = !DILocalVariable(name: "argc", arg: 1, scope: !6, file: !1, line: 4, type: !9) +!17 = !DIExpression() +!18 = !DILocation(line: 4, column: 15, scope: !6) +!19 = !DILocalVariable(name: "argv", arg: 2, scope: !6, file: !1, line: 4, type: !10) +!20 = !DILocation(line: 4, column: 27, scope: !6) +!21 = !DILocation(line: 5, column: 8, scope: !22) +!22 = distinct !DILexicalBlock(scope: !6, file: !1, line: 5, column: 8) +!23 = !DILocation(line: 5, column: 13, scope: !22) +!24 = !DILocation(line: 5, column: 8, scope: !6) +!25 = !DILocation(line: 6, column: 6, scope: !22) +!26 = !DILocalVariable(name: "result", scope: !6, file: !1, line: 7, type: !4) +!27 = !DILocation(line: 7, column: 11, scope: !6) +!28 = !DILocalVariable(name: "limit", scope: !6, file: !1, line: 8, type: !9) +!29 = !DILocation(line: 8, column: 8, scope: !6) +!30 = !DILocation(line: 8, column: 21, scope: !6) +!31 = !DILocation(line: 8, column: 16, scope: !6) +!32 = !DILocation(line: 9, column: 8, scope: !33) +!33 = distinct !DILexicalBlock(scope: !6, file: !1, line: 9, column: 8) +!34 = !DILocation(line: 9, column: 14, scope: !33) +!35 = !DILocation(line: 9, column: 8, scope: !6) +!36 = !DILocalVariable(name: "s", scope: !37, file: !1, line: 10, type: !4) +!37 = distinct !DILexicalBlock(scope: !33, file: !1, line: 9, column: 21) +!38 = !DILocation(line: 10, column: 13, scope: !37) +!39 = !DILocation(line: 10, column: 34, scope: !37) +!40 = !DILocation(line: 10, column: 29, scope: !37) +!41 = !DILocation(line: 10, column: 27, scope: !37) +!42 = !DILocalVariable(name: "u", scope: !43, file: !1, line: 11, type: !9) +!43 = distinct !DILexicalBlock(scope: !37, file: !1, line: 11, column: 6) +!44 = !DILocation(line: 11, column: 15, scope: !43) +!45 = !DILocation(line: 11, column: 11, scope: !43) +!46 = !DILocation(line: 11, column: 22, scope: !47) +!47 = distinct !DILexicalBlock(scope: !43, file: !1, line: 11, column: 6) +!48 = !DILocation(line: 11, column: 26, scope: !47) +!49 = !DILocation(line: 11, column: 24, scope: !47) +!50 = !DILocation(line: 11, column: 6, scope: !43) +!51 = !DILocalVariable(name: "x", scope: !52, file: !1, line: 12, type: !4) +!52 = distinct !DILexicalBlock(scope: !47, file: !1, line: 11, column: 38) +!53 = !DILocation(line: 12, column: 15, scope: !52) +!54 = !DILocation(line: 12, column: 19, scope: !52) +!55 = !DILocation(line: 13, column: 12, scope: !52) +!56 = !DILocation(line: 13, column: 14, scope: !52) +!57 = !DILocation(line: 13, column: 32, scope: !52) +!58 = !DILocation(line: 13, column: 22, scope: !52) +!59 = !DILocation(line: 13, column: 10, scope: !52) +!60 = !DILocation(line: 14, column: 13, scope: !52) +!61 = !DILocation(line: 14, column: 24, scope: !52) +!62 = !DILocation(line: 14, column: 22, scope: !52) +!63 = !DILocation(line: 14, column: 26, scope: !52) +!64 = !DILocation(line: 14, column: 15, scope: !52) +!65 = !DILocation(line: 14, column: 10, scope: !52) +!66 = !DILocation(line: 15, column: 6, scope: !52) +!67 = !DILocation(line: 11, column: 34, scope: !47) +!68 = !DILocation(line: 11, column: 6, scope: !47) +!69 = !DILocation(line: 16, column: 15, scope: !37) +!70 = !DILocation(line: 16, column: 13, scope: !37) +!71 = !DILocation(line: 17, column: 4, scope: !37) +!72 = !DILocation(line: 18, column: 20, scope: !73) +!73 = distinct !DILexicalBlock(scope: !33, file: !1, line: 17, column: 11) +!74 = !DILocation(line: 18, column: 15, scope: !73) +!75 = !DILocation(line: 18, column: 13, scope: !73) +!76 = !DILocation(line: 20, column: 30, scope: !6) +!77 = !DILocation(line: 20, column: 4, scope: !6) +!78 = !DILocation(line: 21, column: 4, scope: !6) +!79 = !DILocation(line: 22, column: 2, scope: !6) +!80 = !{!"branch_weights", i32 90, i32 10} diff --git a/llvm/test/Transforms/SampleProfile/calls.ll b/llvm/test/Transforms/SampleProfile/calls.ll new file mode 100644 index 00000000000..4e4322868af --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/calls.ll @@ -0,0 +1,116 @@ +; RUN: opt < %s -instcombine -sample-profile -sample-profile-file=%S/Inputs/calls.prof | opt -analyze -branch-prob | FileCheck %s +; RUN: opt < %s -passes="function(instcombine),sample-profile" -sample-profile-file=%S/Inputs/calls.prof | opt -analyze -branch-prob | FileCheck %s + +; Original C++ test case +; +; #include <stdio.h> +; +; int sum(int x, int y) { +; return x + y; +; } +; +; int main() { +; int s, i = 0; +; while (i++ < 20000 * 20000) +; if (i != 100) s = sum(i, s); else s = 30; +; printf("sum is %d\n", s); +; return 0; +; } +; +@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 + +; Function Attrs: nounwind uwtable +define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !4 { +entry: + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %0 = load i32, i32* %x.addr, align 4, !dbg !11 + %1 = load i32, i32* %y.addr, align 4, !dbg !11 + %add = add nsw i32 %0, %1, !dbg !11 + ret i32 %add, !dbg !11 +} + +; Function Attrs: uwtable +define i32 @main() !dbg !7 { +entry: + %retval = alloca i32, align 4 + %s = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval + store i32 0, i32* %i, align 4, !dbg !12 + br label %while.cond, !dbg !13 + +while.cond: ; preds = %if.end, %entry + %0 = load i32, i32* %i, align 4, !dbg !14 + %inc = add nsw i32 %0, 1, !dbg !14 + store i32 %inc, i32* %i, align 4, !dbg !14 + %cmp = icmp slt i32 %0, 400000000, !dbg !14 + br i1 %cmp, label %while.body, label %while.end, !dbg !14 +; CHECK: edge while.cond -> while.body probability is 0x77f2798d / 0x80000000 = 93.71% [HOT edge] +; CHECK: edge while.cond -> while.end probability is 0x080d8673 / 0x80000000 = 6.29% + +while.body: ; preds = %while.cond + %1 = load i32, i32* %i, align 4, !dbg !16 + %cmp1 = icmp ne i32 %1, 100, !dbg !16 + br i1 %cmp1, label %if.then, label %if.else, !dbg !16 +; Without discriminator information, the profiler used to think that +; both branches out of while.body had the same weight. In reality, +; the edge while.body->if.then is taken most of the time. +; +; CHECK: edge while.body -> if.else probability is 0x0005b1e0 / 0x80000000 = 0.02% +; CHECK: edge while.body -> if.then probability is 0x7ffa4e20 / 0x80000000 = 99.98% [HOT edge] + + +if.then: ; preds = %while.body + %2 = load i32, i32* %i, align 4, !dbg !18 + %3 = load i32, i32* %s, align 4, !dbg !18 + %call = call i32 @_Z3sumii(i32 %2, i32 %3), !dbg !18 + store i32 %call, i32* %s, align 4, !dbg !18 + br label %if.end, !dbg !18 + +if.else: ; preds = %while.body + store i32 30, i32* %s, align 4, !dbg !20 + br label %if.end + +if.end: ; preds = %if.else, %if.then + br label %while.cond, !dbg !22 + +while.end: ; preds = %while.cond + %4 = load i32, i32* %s, align 4, !dbg !24 + %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %4), !dbg !24 + ret i32 0, !dbg !25 +} + +declare i32 @printf(i8*, ...) #2 + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!8, !9} +!llvm.ident = !{!10} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2) +!1 = !DIFile(filename: "calls.cc", directory: ".") +!2 = !{} +!4 = distinct !DISubprogram(name: "sum", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, retainedNodes: !2) +!5 = !DIFile(filename: "calls.cc", directory: ".") +!6 = !DISubroutineType(types: !2) +!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !5, type: !6, retainedNodes: !2) +!8 = !{i32 2, !"Dwarf Version", i32 4} +!9 = !{i32 1, !"Debug Info Version", i32 3} +!10 = !{!"clang version 3.5 "} +!11 = !DILocation(line: 4, scope: !4) +!12 = !DILocation(line: 8, scope: !7) +!13 = !DILocation(line: 9, scope: !7) +!14 = !DILocation(line: 9, scope: !15) +!15 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !7) +!16 = !DILocation(line: 10, scope: !17) +!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7) +!18 = !DILocation(line: 10, scope: !19) +!19 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !17) +!20 = !DILocation(line: 10, scope: !21) +!21 = !DILexicalBlockFile(discriminator: 4, file: !1, scope: !17) +!22 = !DILocation(line: 10, scope: !23) +!23 = !DILexicalBlockFile(discriminator: 6, file: !1, scope: !17) +!24 = !DILocation(line: 11, scope: !7) +!25 = !DILocation(line: 12, scope: !7) diff --git a/llvm/test/Transforms/SampleProfile/cold-indirect-call.ll b/llvm/test/Transforms/SampleProfile/cold-indirect-call.ll new file mode 100644 index 00000000000..b8a61e07ae7 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/cold-indirect-call.ll @@ -0,0 +1,31 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/cold-indirect-call.prof -S | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/cold-indirect-call.prof -S | FileCheck %s + +define i32 @foo(i32 ()* %func) !dbg !3 { +; CHECK: icmp {{.*}} @bar +; CHECK-NOT: icmp {{.*}} @baz + %call = call i32 %func(), !dbg !4 + ret i32 %call +} + +define i32 @bar() !dbg !5 { + ret i32 41, !dbg !6 +} + +define i32 @baz() !dbg !7 { + ret i32 42, !dbg !8 +} + + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1) +!1 = !DIFile(filename: "foo.cc", directory: "/") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 4, unit: !0) +!4 = !DILocation(line: 5, scope: !3) +!5 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 8, unit: !0) +!6 = !DILocation(line: 9, scope: !5) +!7 = distinct !DISubprogram(name: "baz", scope: !1, file: !1, line: 12, unit: !0) +!8 = !DILocation(line: 13, scope: !7) diff --git a/llvm/test/Transforms/SampleProfile/compact-binary-profile.ll b/llvm/test/Transforms/SampleProfile/compact-binary-profile.ll new file mode 100644 index 00000000000..3b0a2a47c31 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/compact-binary-profile.ll @@ -0,0 +1,121 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -S | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.prof -S | FileCheck %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.compactbinary.afdo -S | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.compactbinary.afdo -S | FileCheck %s + +; Original C++ test case +; +; #include <stdio.h> +; +; int sum(int x, int y) { +; return x + y; +; } +; +; int main() { +; int s, i = 0; +; while (i++ < 20000 * 20000) +; if (i != 100) s = sum(i, s); else s = 30; +; printf("sum is %d\n", s); +; return 0; +; } +; +@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 + +; Check sample-profile phase using compactbinary format profile will annotate +; the IR with exactly the same result as using text format. +; CHECK: br i1 %cmp, label %while.body, label %while.end{{.*}} !prof ![[IDX1:[0-9]*]] +; CHECK: br i1 %cmp1, label %if.then, label %if.else{{.*}} !prof ![[IDX2:[0-9]*]] +; CHECK: call i32 (i8*, ...) @printf{{.*}} !prof ![[IDX3:[0-9]*]] +; CHECK: = !{!"TotalCount", i64 26781} +; CHECK: = !{!"MaxCount", i64 5553} +; CHECK: ![[IDX1]] = !{!"branch_weights", i32 5392, i32 163} +; CHECK: ![[IDX2]] = !{!"branch_weights", i32 5280, i32 113} +; CHECK: ![[IDX3]] = !{!"branch_weights", i32 1} + +; Function Attrs: nounwind uwtable +define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !4 { +entry: + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %0 = load i32, i32* %x.addr, align 4, !dbg !11 + %1 = load i32, i32* %y.addr, align 4, !dbg !11 + %add = add nsw i32 %0, %1, !dbg !11 + ret i32 %add, !dbg !11 +} + +; Function Attrs: uwtable +define i32 @main() !dbg !7 { +entry: + %retval = alloca i32, align 4 + %s = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval + store i32 0, i32* %i, align 4, !dbg !12 + br label %while.cond, !dbg !13 + +while.cond: ; preds = %if.end, %entry + %0 = load i32, i32* %i, align 4, !dbg !14 + %inc = add nsw i32 %0, 1, !dbg !14 + store i32 %inc, i32* %i, align 4, !dbg !14 + %cmp = icmp slt i32 %0, 400000000, !dbg !14 + br i1 %cmp, label %while.body, label %while.end, !dbg !14 + +while.body: ; preds = %while.cond + %1 = load i32, i32* %i, align 4, !dbg !16 + %cmp1 = icmp ne i32 %1, 100, !dbg !16 + br i1 %cmp1, label %if.then, label %if.else, !dbg !16 + + +if.then: ; preds = %while.body + %2 = load i32, i32* %i, align 4, !dbg !18 + %3 = load i32, i32* %s, align 4, !dbg !18 + %call = call i32 @_Z3sumii(i32 %2, i32 %3), !dbg !18 + store i32 %call, i32* %s, align 4, !dbg !18 + br label %if.end, !dbg !18 + +if.else: ; preds = %while.body + store i32 30, i32* %s, align 4, !dbg !20 + br label %if.end + +if.end: ; preds = %if.else, %if.then + br label %while.cond, !dbg !22 + +while.end: ; preds = %while.cond + %4 = load i32, i32* %s, align 4, !dbg !24 + %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %4), !dbg !24 + ret i32 0, !dbg !25 +} + +declare i32 @printf(i8*, ...) #2 + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!8, !9} +!llvm.ident = !{!10} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2) +!1 = !DIFile(filename: "calls.cc", directory: ".") +!2 = !{} +!4 = distinct !DISubprogram(name: "sum", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, retainedNodes: !2) +!5 = !DIFile(filename: "calls.cc", directory: ".") +!6 = !DISubroutineType(types: !2) +!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !5, type: !6, retainedNodes: !2) +!8 = !{i32 2, !"Dwarf Version", i32 4} +!9 = !{i32 1, !"Debug Info Version", i32 3} +!10 = !{!"clang version 3.5 "} +!11 = !DILocation(line: 4, scope: !4) +!12 = !DILocation(line: 8, scope: !7) +!13 = !DILocation(line: 9, scope: !7) +!14 = !DILocation(line: 9, scope: !15) +!15 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !7) +!16 = !DILocation(line: 10, scope: !17) +!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7) +!18 = !DILocation(line: 10, scope: !19) +!19 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !17) +!20 = !DILocation(line: 10, scope: !21) +!21 = !DILexicalBlockFile(discriminator: 4, file: !1, scope: !17) +!22 = !DILocation(line: 10, scope: !23) +!23 = !DILexicalBlockFile(discriminator: 6, file: !1, scope: !17) +!24 = !DILocation(line: 11, scope: !7) +!25 = !DILocation(line: 12, scope: !7) diff --git a/llvm/test/Transforms/SampleProfile/cov-zero-samples.ll b/llvm/test/Transforms/SampleProfile/cov-zero-samples.ll new file mode 100644 index 00000000000..9eb312aa4c0 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/cov-zero-samples.ll @@ -0,0 +1,147 @@ +; RUN: opt < %s -instcombine -sample-profile -sample-profile-file=%S/Inputs/cov-zero-samples.prof -sample-profile-check-record-coverage=100 -pass-remarks=sample-profile -pass-remarks-analysis=sample-profile -o /dev/null 2>&1 | FileCheck %s +; RUN: opt < %s -passes="function(instcombine),sample-profile" -sample-profile-file=%S/Inputs/cov-zero-samples.prof -sample-profile-check-record-coverage=100 -pass-remarks=sample-profile -pass-remarks-analysis=sample-profile -o /dev/null 2>&1 | FileCheck %s +; +; CHECK: remark: cov-zero-samples.cc:9:29: Applied 404065 samples from profile (offset: 2.1) +; CHECK: remark: cov-zero-samples.cc:10:9: Applied 443089 samples from profile (offset: 3) +; CHECK: remark: cov-zero-samples.cc:10:36: Applied 0 samples from profile (offset: 3.1) +; CHECK: remark: cov-zero-samples.cc:11:12: Applied 404066 samples from profile (offset: 4) +; CHECK: remark: cov-zero-samples.cc:13:25: Applied 0 samples from profile (offset: 6) +; CHECK: remark: cov-zero-samples.cc:14:3: Applied 0 samples from profile (offset: 7) +; CHECK: remark: cov-zero-samples.cc:10:9: most popular destination for conditional branches at cov-zero-samples.cc:9:3 +; CHECK: remark: cov-zero-samples.cc:11:12: most popular destination for conditional branches at cov-zero-samples.cc:10:9 +; +; Coverage for this profile should be 100% +; CHECK-NOT: warning: cov-zero-samples.cc:1: + +source_filename = "test/Transforms/SampleProfile/cov-zero-samples.ll" + +@N = global i64 8000000000, align 8, !dbg !0 +@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 + +define i32 @_Z12never_calledi(i32 %i) !dbg !11 { +entry: + ret i32 0, !dbg !15 +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata, metadata) #0 + +define i32 @main() !dbg !17 { +entry: + %retval = alloca i32, align 4 + %sum = alloca i32, align 4 + %i = alloca i64, align 8 + store i32 0, i32* %retval, align 4 + call void @llvm.dbg.declare(metadata i32* %sum, metadata !20, metadata !21), !dbg !22 + store i32 0, i32* %sum, align 4, !dbg !22 + call void @llvm.dbg.declare(metadata i64* %i, metadata !23, metadata !21), !dbg !25 + store i64 0, i64* %i, align 8, !dbg !25 + br label %for.cond, !dbg !26 + +for.cond: ; preds = %for.inc, %entry + + %0 = load i64, i64* %i, align 8, !dbg !27 + %1 = load volatile i64, i64* @N, align 8, !dbg !30 + %cmp = icmp slt i64 %0, %1, !dbg !31 + br i1 %cmp, label %for.body, label %for.end, !dbg !32 + +for.body: ; preds = %for.cond + %2 = load i64, i64* %i, align 8, !dbg !33 + %3 = load volatile i64, i64* @N, align 8, !dbg !36 + %cmp1 = icmp sgt i64 %2, %3, !dbg !37 + br i1 %cmp1, label %if.then, label %if.end, !dbg !38 + +if.then: ; preds = %for.body + %4 = load i64, i64* %i, align 8, !dbg !39 + %conv = trunc i64 %4 to i32, !dbg !39 + %call = call i32 @_Z12never_calledi(i32 %conv), !dbg !41 + %5 = load i32, i32* %sum, align 4, !dbg !42 + %add = add nsw i32 %5, %call, !dbg !42 + store i32 %add, i32* %sum, align 4, !dbg !42 + br label %if.end, !dbg !43 + +if.end: ; preds = %if.then, %for.body + %6 = load i64, i64* %i, align 8, !dbg !44 + %div = sdiv i64 %6, 239, !dbg !45 + %7 = load i32, i32* %sum, align 4, !dbg !46 + %conv2 = sext i32 %7 to i64, !dbg !46 + %mul = mul nsw i64 %conv2, %div, !dbg !46 + %conv3 = trunc i64 %mul to i32, !dbg !46 + store i32 %conv3, i32* %sum, align 4, !dbg !46 + br label %for.inc, !dbg !47 + +for.inc: ; preds = %if.end + %8 = load i64, i64* %i, align 8, !dbg !48 + %inc = add nsw i64 %8, 1, !dbg !48 + store i64 %inc, i64* %i, align 8, !dbg !48 + br label %for.cond, !dbg !50 + +for.end: ; preds = %for.cond + %9 = load i32, i32* %sum, align 4, !dbg !51 + %call4 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %9), !dbg !52 + ret i32 0, !dbg !53 +} + +declare i32 @printf(i8*, ...) + +attributes #0 = { nounwind readnone } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!8, !9} +!llvm.ident = !{!10} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = !DIGlobalVariable(name: "N", scope: !2, file: !3, line: 3, type: !6, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang version 3.8.0 (trunk 253667) (llvm/trunk 253670)", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !4, globals: !5) +!3 = !DIFile(filename: "cov-zero-samples.cc", directory: ".") +!4 = !{} +!5 = !{!0} +!6 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !7) +!7 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed) +!8 = !{i32 2, !"Dwarf Version", i32 4} +!9 = !{i32 2, !"Debug Info Version", i32 3} +!10 = !{!"clang version 3.8.0 (trunk 253667) (llvm/trunk 253670)"} +!11 = distinct !DISubprogram(name: "never_called", linkageName: "_Z12never_calledi", scope: !3, file: !3, line: 5, type: !12, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, unit: !2, retainedNodes: !4) +!12 = !DISubroutineType(types: !13) +!13 = !{!14, !14} +!14 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!15 = !DILocation(line: 5, column: 27, scope: !16) +!16 = !DILexicalBlockFile(scope: !11, file: !3, discriminator: 6) +!17 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 7, type: !18, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, unit: !2, retainedNodes: !4) +!18 = !DISubroutineType(types: !19) +!19 = !{!14} +!20 = !DILocalVariable(name: "sum", scope: !17, file: !3, line: 8, type: !14) +!21 = !DIExpression() +!22 = !DILocation(line: 8, column: 7, scope: !17) +!23 = !DILocalVariable(name: "i", scope: !24, file: !3, line: 9, type: !7) +!24 = distinct !DILexicalBlock(scope: !17, file: !3, line: 9, column: 3) +!25 = !DILocation(line: 9, column: 18, scope: !24) +!26 = !DILocation(line: 9, column: 8, scope: !24) +!27 = !DILocation(line: 9, column: 25, scope: !28) +!28 = !DILexicalBlockFile(scope: !29, file: !3, discriminator: 2) +!29 = distinct !DILexicalBlock(scope: !24, file: !3, line: 9, column: 3) +!30 = !DILocation(line: 9, column: 29, scope: !28) +!31 = !DILocation(line: 9, column: 27, scope: !28) +!32 = !DILocation(line: 9, column: 3, scope: !28) +!33 = !DILocation(line: 10, column: 9, scope: !34) +!34 = distinct !DILexicalBlock(scope: !35, file: !3, line: 10, column: 9) +!35 = distinct !DILexicalBlock(scope: !29, file: !3, line: 9, column: 37) +!36 = !DILocation(line: 10, column: 13, scope: !34) +!37 = !DILocation(line: 10, column: 11, scope: !34) +!38 = !DILocation(line: 10, column: 9, scope: !35) +!39 = !DILocation(line: 10, column: 36, scope: !40) +!40 = !DILexicalBlockFile(scope: !34, file: !3, discriminator: 2) +!41 = !DILocation(line: 10, column: 23, scope: !40) +!42 = !DILocation(line: 10, column: 20, scope: !40) +!43 = !DILocation(line: 10, column: 16, scope: !40) +!44 = !DILocation(line: 11, column: 12, scope: !35) +!45 = !DILocation(line: 11, column: 14, scope: !35) +!46 = !DILocation(line: 11, column: 9, scope: !35) +!47 = !DILocation(line: 12, column: 3, scope: !35) +!48 = !DILocation(line: 9, column: 33, scope: !49) +!49 = !DILexicalBlockFile(scope: !29, file: !3, discriminator: 4) +!50 = !DILocation(line: 9, column: 3, scope: !49) +!51 = !DILocation(line: 13, column: 25, scope: !17) +!52 = !DILocation(line: 13, column: 3, scope: !17) +!53 = !DILocation(line: 14, column: 3, scope: !17) + diff --git a/llvm/test/Transforms/SampleProfile/coverage-warning.ll b/llvm/test/Transforms/SampleProfile/coverage-warning.ll new file mode 100644 index 00000000000..3f683cec529 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/coverage-warning.ll @@ -0,0 +1,46 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/coverage-warning.prof -sample-profile-check-record-coverage=90 -sample-profile-check-sample-coverage=100 -o /dev/null 2>&1 | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/coverage-warning.prof -sample-profile-check-record-coverage=90 -sample-profile-check-sample-coverage=100 -o /dev/null 2>&1 | FileCheck %s +define i32 @foo(i32 %i) !dbg !4 { +; The profile has samples for line locations that are no longer present. +; Coverage does not reach 90%, so we should get this warning: +; +; CHECK: warning: coverage-warning.c:1: 2 of 3 available profile records (66%) were applied +; CHECK: warning: coverage-warning.c:1: 29000 of 30700 available profile samples (94%) were applied +entry: + %retval = alloca i32, align 4 + %i.addr = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + %0 = load i32, i32* %i.addr, align 4, !dbg !9 + %cmp = icmp sgt i32 %0, 1000, !dbg !10 + br i1 %cmp, label %if.then, label %if.end, !dbg !9 + +if.then: ; preds = %entry + store i32 30, i32* %retval, align 4, !dbg !11 + br label %return, !dbg !11 + +if.end: ; preds = %entry + store i32 3, i32* %retval, align 4, !dbg !12 + br label %return, !dbg !12 + +return: ; preds = %if.end, %if.then + %1 = load i32, i32* %retval, align 4, !dbg !13 + ret i32 %1, !dbg !13 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!6, !7} +!llvm.ident = !{!8} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251524) (llvm/trunk 251531)", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2) +!1 = !DIFile(filename: "coverage-warning.c", directory: ".") +!2 = !{} +!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) +!5 = !DISubroutineType(types: !2) +!6 = !{i32 2, !"Dwarf Version", i32 4} +!7 = !{i32 2, !"Debug Info Version", i32 3} +!8 = !{!"clang version 3.8.0 (trunk 251524) (llvm/trunk 251531)"} +!9 = !DILocation(line: 2, column: 7, scope: !4) +!10 = !DILocation(line: 2, column: 9, scope: !4) +!11 = !DILocation(line: 3, column: 5, scope: !4) +!12 = !DILocation(line: 4, column: 3, scope: !4) +!13 = !DILocation(line: 5, column: 1, scope: !4) diff --git a/llvm/test/Transforms/SampleProfile/discriminator.ll b/llvm/test/Transforms/SampleProfile/discriminator.ll new file mode 100644 index 00000000000..7b3b270d9f1 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/discriminator.ll @@ -0,0 +1,90 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/discriminator.prof | opt -analyze -branch-prob | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/discriminator.prof | opt -analyze -branch-prob | FileCheck %s + +; Original code +; +; 1 int foo(int i) { +; 2 int x = 0; +; 3 while (i < 100) { +; 4 if (i < 5) x--; +; 5 i++; +; 6 } +; 7 return x; +; 8 } +; +; In this test, if the loop is executed 100 times, the decrement operation +; at line 4 should only execute 5 times. This is reflected in the profile +; data for line offset 3. In Inputs/discriminator.prof, we have: +; +; 3: 100 +; 3.1: 5 +; +; This means that the predicate 'i < 5' (line 3) is executed 100 times, +; but the then branch (line 3.1) is only executed 5 times. + +define i32 @foo(i32 %i) #0 !dbg !4 { +; CHECK: Printing analysis 'Branch Probability Analysis' for function 'foo': +entry: + %i.addr = alloca i32, align 4 + %x = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + store i32 0, i32* %x, align 4, !dbg !10 + br label %while.cond, !dbg !11 + +while.cond: ; preds = %if.end, %entry + %0 = load i32, i32* %i.addr, align 4, !dbg !12 + %cmp = icmp slt i32 %0, 100, !dbg !12 + br i1 %cmp, label %while.body, label %while.end, !dbg !12 +; CHECK: edge while.cond -> while.body probability is 0x7d83ba68 / 0x80000000 = 98.06% [HOT edge] +; CHECK: edge while.cond -> while.end probability is 0x027c4598 / 0x80000000 = 1.94% + +while.body: ; preds = %while.cond + %1 = load i32, i32* %i.addr, align 4, !dbg !14 + %cmp1 = icmp slt i32 %1, 50, !dbg !14 + br i1 %cmp1, label %if.then, label %if.end, !dbg !14 +; CHECK: edge while.body -> if.then probability is 0x07878788 / 0x80000000 = 5.88% +; CHECK: edge while.body -> if.end probability is 0x78787878 / 0x80000000 = 94.12% [HOT edge] + +if.then: ; preds = %while.body + %2 = load i32, i32* %x, align 4, !dbg !17 + %dec = add nsw i32 %2, -1, !dbg !17 + store i32 %dec, i32* %x, align 4, !dbg !17 + br label %if.end, !dbg !17 + +if.end: ; preds = %if.then, %while.body + %3 = load i32, i32* %i.addr, align 4, !dbg !19 + %inc = add nsw i32 %3, 1, !dbg !19 + store i32 %inc, i32* %i.addr, align 4, !dbg !19 + br label %while.cond, !dbg !20 + +while.end: ; preds = %while.cond + %4 = load i32, i32* %x, align 4, !dbg !21 + ret i32 %4, !dbg !21 +} + + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7, !8} +!llvm.ident = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2) +!1 = !DIFile(filename: "discriminator.c", directory: ".") +!2 = !{} +!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, retainedNodes: !2) +!5 = !DIFile(filename: "discriminator.c", directory: ".") +!6 = !DISubroutineType(types: !2) +!7 = !{i32 2, !"Dwarf Version", i32 4} +!8 = !{i32 1, !"Debug Info Version", i32 3} +!9 = !{!"clang version 3.5 "} +!10 = !DILocation(line: 2, scope: !4) +!11 = !DILocation(line: 3, scope: !4) +!12 = !DILocation(line: 3, scope: !13) +!13 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !4) +!14 = !DILocation(line: 4, scope: !15) +!15 = distinct !DILexicalBlock(line: 4, column: 0, file: !1, scope: !16) +!16 = distinct !DILexicalBlock(line: 3, column: 0, file: !1, scope: !4) +!17 = !DILocation(line: 4, scope: !18) +!18 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !15) +!19 = !DILocation(line: 5, scope: !16) +!20 = !DILocation(line: 6, scope: !16) +!21 = !DILocation(line: 7, scope: !4) diff --git a/llvm/test/Transforms/SampleProfile/early-inline.ll b/llvm/test/Transforms/SampleProfile/early-inline.ll new file mode 100644 index 00000000000..8b857a449f0 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/early-inline.ll @@ -0,0 +1,76 @@ +; RUN: opt < %s -instcombine -sample-profile -sample-profile-file=%S/Inputs/einline.prof -S | FileCheck %s + +; Checks if both call and invoke can be inlined early if their inlined +; instances are hot in profile. + +target triple = "x86_64-unknown-linux-gnu" + +@_ZTIi = external constant i8* + +; Function Attrs: uwtable +define void @_Z3foov() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg !6 { + %1 = alloca i8* + %2 = alloca i32 + %3 = alloca i32, align 4 +; CHECK: call void @no_inline + call void @no_inline(), !dbg !16 +; CHECK-NOT: call + call void @_ZL3barv(), !dbg !9 +; CHECK-NOT: invoke + invoke void @_ZL3barv() + to label %4 unwind label %5, !dbg !10 + +; <label>:4: + ret void + +; <label>:5: + %6 = landingpad { i8*, i32 } + catch i8* bitcast (i8** @_ZTIi to i8*) + ret void +} + +; Function Attrs: nounwind uwtable +define internal void @_ZL3barv() #0 !dbg !12 { + ret void +} + +; CHECK-LABEL: @recursive +define void @recursive() #0 !dbg !13 { +; Recursive calls should not be early-inlined. +; CHECK-NOT: call void @recursive +; CHECK: call void @recursive +; CHECK: call void @recursive +; CHECK-NOT: call void @recursive +; CHECK: ret + call void @recursive(), !dbg !14 + call void @recursive(), !dbg !15 + ret void +} + +; The callee has mismatch attributes to the caller, it should not be inlined +define void @no_inline() #1 !dbg !17 { + ret void +} + +declare i32 @__gxx_personality_v0(...) + +attributes #0 = {"target-features"="+sse4.1"} +attributes #1 = {"target-features"="+sse4.2"} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1) +!1 = !DIFile(filename: "a", directory: "b/") +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!6 = distinct !DISubprogram(linkageName: "_Z3foov", scope: !1, file: !1, line: 5, scopeLine: 5, unit: !0) +!9 = !DILocation(line: 6, column: 3, scope: !6) +!10 = !DILocation(line: 8, column: 5, scope: !11) +!11 = distinct !DILexicalBlock(scope: !6, file: !1, line: 7, column: 7) +!12 = distinct !DISubprogram(linkageName: "_ZL3barv", scope: !1, file: !1, line: 20, scopeLine: 20, unit: !0) +!13 = distinct !DISubprogram(linkageName: "recursive", scope: !1, file: !1, line: 20, scopeLine: 20, unit: !0) +!14 = !DILocation(line: 21, column: 3, scope: !13) +!15 = !DILocation(line: 22, column: 3, scope: !13) +!16 = !DILocation(line: 7, column: 3, scope: !6) +!17 = distinct !DISubprogram(linkageName: "no_inline", scope: !1, file: !1, line: 20, scopeLine: 20, unit: !0) diff --git a/llvm/test/Transforms/SampleProfile/entry_counts.ll b/llvm/test/Transforms/SampleProfile/entry_counts.ll new file mode 100644 index 00000000000..c7fc50313c0 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/entry_counts.ll @@ -0,0 +1,31 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/entry_counts.prof -S | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/entry_counts.prof -S | FileCheck %s + +; According to the profile, function empty() was called 13,293 times. +; CHECK: {{.*}} = !{!"function_entry_count", i64 13294} + +define void @empty() !dbg !4 { +entry: + ret void, !dbg !9 +} + +; This function does not have profile, check if function_entry_count is -1 +; CHECK: {{.*}} = !{!"function_entry_count", i64 -1} +define void @no_profile() { +entry: + ret void +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!6, !7} +!llvm.ident = !{!8} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 237249) (llvm/trunk 237261)", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, retainedTypes: !2, globals: !2, imports: !2) +!1 = !DIFile(filename: "entry_counts.c", directory: ".") +!2 = !{} +!4 = distinct !DISubprogram(name: "empty", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, unit: !0, retainedNodes: !2) +!5 = !DISubroutineType(types: !2) +!6 = !{i32 2, !"Dwarf Version", i32 4} +!7 = !{i32 2, !"Debug Info Version", i32 3} +!8 = !{!"clang version 3.7.0 (trunk 237249) (llvm/trunk 237261)"} +!9 = !DILocation(line: 1, column: 15, scope: !4) diff --git a/llvm/test/Transforms/SampleProfile/entry_counts_cold.ll b/llvm/test/Transforms/SampleProfile/entry_counts_cold.ll new file mode 100644 index 00000000000..da60ebec6d5 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/entry_counts_cold.ll @@ -0,0 +1,170 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/entry_counts_cold.prof -S | FileCheck %s +; ModuleID = 'temp.bc' +source_filename = "temp.c" +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +; Function Attrs: nounwind ssp uwtable +; CHECK: define i32 @top({{.*}} !prof [[TOP:![0-9]+]] +define i32 @top(i32* %p) #0 !dbg !8 { +entry: + %p.addr = alloca i32*, align 8 + store i32* %p, i32** %p.addr, align 8, !tbaa !15 + call void @llvm.dbg.declare(metadata i32** %p.addr, metadata !14, metadata !DIExpression()), !dbg !19 + %0 = load i32*, i32** %p.addr, align 8, !dbg !20, !tbaa !15 + %call = call i32 @foo(i32* %0), !dbg !21 +; foo is inlined +; CHECK-NOT: call i32 @foo +; CHECK: call i32 @bar + %1 = load i32*, i32** %p.addr, align 8, !dbg !22, !tbaa !15 + %2 = load i32, i32* %1, align 4, !dbg !24, !tbaa !25 + %tobool = icmp ne i32 %2, 0, !dbg !24 + br i1 %tobool, label %if.then, label %if.end, !dbg !27 + +if.then: ; preds = %entry + %3 = load i32*, i32** %p.addr, align 8, !dbg !28, !tbaa !15 +; bar is not inlined +; CHECK: call i32 @bar + %call1 = call i32 @bar(i32* %3), !dbg !29 + br label %if.end, !dbg !29 + +if.end: ; preds = %if.then, %entry + ret i32 0, !dbg !30 +} + +; Function Attrs: nounwind readnone speculatable +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +; Function Attrs: nounwind ssp uwtable +; CHECK: define i32 @foo({{.*}} !prof [[FOO:![0-9]+]] +define i32 @foo(i32* %p) #0 !dbg !31 { +entry: + %p.addr = alloca i32*, align 8 + %a = alloca i32, align 4 + store i32* %p, i32** %p.addr, align 8, !tbaa !15 + call void @llvm.dbg.declare(metadata i32** %p.addr, metadata !33, metadata !DIExpression()), !dbg !35 + %0 = bitcast i32* %a to i8*, !dbg !36 + call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #4, !dbg !36 + call void @llvm.dbg.declare(metadata i32* %a, metadata !34, metadata !DIExpression()), !dbg !37 + %1 = load i32*, i32** %p.addr, align 8, !dbg !38, !tbaa !15 + %arrayidx = getelementptr inbounds i32, i32* %1, i64 3, !dbg !38 + %2 = load i32, i32* %arrayidx, align 4, !dbg !38, !tbaa !25 + %3 = load i32*, i32** %p.addr, align 8, !dbg !39, !tbaa !15 + %arrayidx1 = getelementptr inbounds i32, i32* %3, i64 2, !dbg !39 + %4 = load i32, i32* %arrayidx1, align 4, !dbg !40, !tbaa !25 + %add = add nsw i32 %4, %2, !dbg !40 + store i32 %add, i32* %arrayidx1, align 4, !dbg !40, !tbaa !25 + %5 = load i32*, i32** %p.addr, align 8, !dbg !41, !tbaa !15 + %call = call i32 @bar(i32* %5), !dbg !42 + store i32 %call, i32* %a, align 4, !dbg !43, !tbaa !25 + %6 = load i32, i32* %a, align 4, !dbg !44, !tbaa !25 + %add2 = add nsw i32 %6, 1, !dbg !45 + %7 = bitcast i32* %a to i8*, !dbg !46 + call void @llvm.lifetime.end.p0i8(i64 4, i8* %7) #4, !dbg !46 + ret i32 %add2, !dbg !47 +} + +; Function Attrs: nounwind ssp uwtable +; CHECK: define i32 @bar({{.*}} !prof [[BAR:![0-9]+]] +define i32 @bar(i32* %p) #0 !dbg !48 { +entry: + %p.addr = alloca i32*, align 8 + store i32* %p, i32** %p.addr, align 8, !tbaa !15 + call void @llvm.dbg.declare(metadata i32** %p.addr, metadata !50, metadata !DIExpression()), !dbg !51 + ; CHECK: call void (...) @baz{{.*}} !prof [[BAZ:![0-9]+]] + call void (...) @baz(), !dbg !52 + %0 = load i32*, i32** %p.addr, align 8, !dbg !53, !tbaa !15 + %arrayidx = getelementptr inbounds i32, i32* %0, i64 2, !dbg !53 + %1 = load i32, i32* %arrayidx, align 4, !dbg !53, !tbaa !25 + %2 = load i32*, i32** %p.addr, align 8, !dbg !54, !tbaa !15 + %arrayidx1 = getelementptr inbounds i32, i32* %2, i64 1, !dbg !54 + %3 = load i32, i32* %arrayidx1, align 4, !dbg !55, !tbaa !25 + %add = add nsw i32 %3, %1, !dbg !55 + store i32 %add, i32* %arrayidx1, align 4, !dbg !55, !tbaa !25 + %4 = load i32*, i32** %p.addr, align 8, !dbg !56, !tbaa !15 + %arrayidx2 = getelementptr inbounds i32, i32* %4, i64 3, !dbg !56 + %5 = load i32, i32* %arrayidx2, align 4, !dbg !56, !tbaa !25 + ret i32 %5, !dbg !57 +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #2 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #2 + +declare void @baz(...) #3 + +attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone speculatable } +attributes #2 = { argmemonly nounwind } +attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #4 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6} +!llvm.ident = !{!7} + +; CHECK: [[TOP]] = !{!"function_entry_count", i64 101} +; CHECK: [[FOO]] = !{!"function_entry_count", i64 151} +; CHECK: [[BAR]] = !{!"function_entry_count", i64 303} +; CHECK: [[BAZ]] = !{!"branch_weights", i64 303} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 8.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: GNU) +!1 = !DIFile(filename: "temp.c", directory: "llvm/test/Transforms/SampleProfile") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{i32 7, !"PIC Level", i32 2} +!7 = !{!"clang version 8.0.0"} +!8 = distinct !DISubprogram(name: "top", scope: !1, file: !1, line: 5, type: !9, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !13) +!9 = !DISubroutineType(types: !10) +!10 = !{!11, !12} +!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!12 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64) +!13 = !{!14} +!14 = !DILocalVariable(name: "p", arg: 1, scope: !8, file: !1, line: 5, type: !12) +!15 = !{!16, !16, i64 0} +!16 = !{!"any pointer", !17, i64 0} +!17 = !{!"omnipotent char", !18, i64 0} +!18 = !{!"Simple C/C++ TBAA"} +!19 = !DILocation(line: 5, column: 14, scope: !8) +!20 = !DILocation(line: 6, column: 7, scope: !8) +!21 = !DILocation(line: 6, column: 3, scope: !8) +!22 = !DILocation(line: 7, column: 8, scope: !23) +!23 = distinct !DILexicalBlock(scope: !8, file: !1, line: 7, column: 7) +!24 = !DILocation(line: 7, column: 7, scope: !23) +!25 = !{!26, !26, i64 0} +!26 = !{!"int", !17, i64 0} +!27 = !DILocation(line: 7, column: 7, scope: !8) +!28 = !DILocation(line: 8, column: 9, scope: !23) +!29 = !DILocation(line: 8, column: 5, scope: !23) +!30 = !DILocation(line: 9, column: 3, scope: !8) +!31 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 12, type: !9, scopeLine: 12, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !32) +!32 = !{!33, !34} +!33 = !DILocalVariable(name: "p", arg: 1, scope: !31, file: !1, line: 12, type: !12) +!34 = !DILocalVariable(name: "a", scope: !31, file: !1, line: 13, type: !11) +!35 = !DILocation(line: 12, column: 14, scope: !31) +!36 = !DILocation(line: 13, column: 3, scope: !31) +!37 = !DILocation(line: 13, column: 7, scope: !31) +!38 = !DILocation(line: 14, column: 11, scope: !31) +!39 = !DILocation(line: 14, column: 3, scope: !31) +!40 = !DILocation(line: 14, column: 8, scope: !31) +!41 = !DILocation(line: 15, column: 11, scope: !31) +!42 = !DILocation(line: 15, column: 7, scope: !31) +!43 = !DILocation(line: 15, column: 5, scope: !31) +!44 = !DILocation(line: 16, column: 10, scope: !31) +!45 = !DILocation(line: 16, column: 11, scope: !31) +!46 = !DILocation(line: 17, column: 1, scope: !31) +!47 = !DILocation(line: 16, column: 3, scope: !31) +!48 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 19, type: !9, scopeLine: 19, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !49) +!49 = !{!50} +!50 = !DILocalVariable(name: "p", arg: 1, scope: !48, file: !1, line: 19, type: !12) +!51 = !DILocation(line: 19, column: 15, scope: !48) +!52 = !DILocation(line: 20, column: 3, scope: !48) +!53 = !DILocation(line: 21, column: 11, scope: !48) +!54 = !DILocation(line: 21, column: 3, scope: !48) +!55 = !DILocation(line: 21, column: 8, scope: !48) +!56 = !DILocation(line: 22, column: 10, scope: !48) +!57 = !DILocation(line: 22, column: 3, scope: !48) diff --git a/llvm/test/Transforms/SampleProfile/flattened.ll b/llvm/test/Transforms/SampleProfile/flattened.ll new file mode 100644 index 00000000000..7a1e53b966c --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/flattened.ll @@ -0,0 +1,39 @@ +; Check flattened profile will not be read in thinlto postlink. +; RUN: opt < %s -O2 -flattened-profile-used -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -enable-chr=false -perform-thinlto=true -S | FileCheck %s +; RUN: opt < %s -passes='thinlto<O2>' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -flattened-profile-used -S | FileCheck %s +; +; Check flattened profile will be read in thinlto prelink. +; RUN: opt < %s -O2 -flattened-profile-used -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -enable-chr=false -prepare-for-thinlto=true -S | FileCheck %s --check-prefix=PRELINK +; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -flattened-profile-used -S | FileCheck %s --check-prefix=PRELINK +; +; Check flattened profile will be read in non-thinlto mode. +; RUN: opt < %s -O2 -flattened-profile-used -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -enable-chr=false -S | FileCheck %s --check-prefix=NOTHINLTO +; RUN: opt < %s -passes='default<O2>' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/flattened.prof -flattened-profile-used -S | FileCheck %s --check-prefix=NOTHINLTO +; +; CHECK-NOT: !{!"ProfileFormat", !"SampleProfile"} +; PRELINK: !{!"ProfileFormat", !"SampleProfile"} +; NOTHINLTO: !{!"ProfileFormat", !"SampleProfile"} + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: norecurse nounwind readnone uwtable +define dso_local i32 @foo() local_unnamed_addr !dbg !7 { +entry: + ret i32 -1, !dbg !9 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 8.0.0 (trunk 345241)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "a.c", directory: "") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 8.0.0 (trunk 345241)"} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !2) +!9 = !DILocation(line: 2, column: 3, scope: !7) diff --git a/llvm/test/Transforms/SampleProfile/fnptr.ll b/llvm/test/Transforms/SampleProfile/fnptr.ll new file mode 100644 index 00000000000..28319d8dd43 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/fnptr.ll @@ -0,0 +1,157 @@ +; The two profiles used in this test are the same but encoded in different +; formats. This checks that we produce the same profile annotations regardless +; of the profile format. +; +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/fnptr.prof | opt -analyze -branch-prob | FileCheck %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/fnptr.binprof | opt -analyze -branch-prob | FileCheck %s + +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/fnptr.prof | opt -analyze -branch-prob | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/fnptr.binprof | opt -analyze -branch-prob | FileCheck %s + +; CHECK: edge for.body3 -> if.then probability is 0x1a56a56a / 0x80000000 = 20.58% +; CHECK: edge for.body3 -> if.else probability is 0x65a95a96 / 0x80000000 = 79.42% +; CHECK: edge for.inc -> for.inc12 probability is 0x000fbd1c / 0x80000000 = 0.05% +; CHECK: edge for.inc -> for.body3 probability is 0x7ff042e4 / 0x80000000 = 99.95% +; CHECK: edge for.inc12 -> for.end14 probability is 0x04000000 / 0x80000000 = 3.12% +; CHECK: edge for.inc12 -> for.cond1.preheader probability is 0x7c000000 / 0x80000000 = 96.88% + +; Original C++ test case. +; +; #include <stdlib.h> +; #include <math.h> +; #include <stdio.h> +; +; #define N 10000 +; #define M 6000 +; +; double foo(int x) { +; return x * sin((double)x); +; } +; +; double bar(int x) { +; return x - cos((double)x); +; } +; +; int main() { +; double (*fptr)(int); +; double S = 0; +; for (int i = 0; i < N; i++) +; for (int j = 0; j < M; j++) { +; fptr = (rand() % 100 < 30) ? foo : bar; +; if (rand() % 100 < 10) +; S += (*fptr)(i + j * 300); +; else +; S += (*fptr)(i - j / 840); +; } +; printf("S = %lf\n", S); +; return 0; +; } + +@.str = private unnamed_addr constant [9 x i8] c"S = %lf\0A\00", align 1 + +define double @_Z3fooi(i32 %x) #0 !dbg !3 { +entry: + %conv = sitofp i32 %x to double, !dbg !2 + %call = tail call double @sin(double %conv) #3, !dbg !8 + %mul = fmul double %conv, %call, !dbg !8 + ret double %mul, !dbg !8 +} + +declare double @sin(double) #1 + +define double @_Z3bari(i32 %x) #0 !dbg !10 { +entry: + %conv = sitofp i32 %x to double, !dbg !9 + %call = tail call double @cos(double %conv) #3, !dbg !11 + %sub = fsub double %conv, %call, !dbg !11 + ret double %sub, !dbg !11 +} + +declare double @cos(double) #1 + +define i32 @main() #2 !dbg !13 { +entry: + br label %for.cond1.preheader, !dbg !12 + +for.cond1.preheader: ; preds = %for.inc12, %entry + %i.025 = phi i32 [ 0, %entry ], [ %inc13, %for.inc12 ] + %S.024 = phi double [ 0.000000e+00, %entry ], [ %S.2.lcssa, %for.inc12 ] + br label %for.body3, !dbg !14 + +for.body3: ; preds = %for.inc, %for.cond1.preheader + %j.023 = phi i32 [ 0, %for.cond1.preheader ], [ %inc, %for.inc ] + %S.122 = phi double [ %S.024, %for.cond1.preheader ], [ %S.2, %for.inc ] + %call = tail call i32 @rand() #3, !dbg !15 + %rem = srem i32 %call, 100, !dbg !15 + %cmp4 = icmp slt i32 %rem, 30, !dbg !15 + %_Z3fooi._Z3bari = select i1 %cmp4, double (i32)* @_Z3fooi, double (i32)* @_Z3bari, !dbg !15 + %call5 = tail call i32 @rand() #3, !dbg !16 + %rem6 = srem i32 %call5, 100, !dbg !16 + %cmp7 = icmp slt i32 %rem6, 10, !dbg !16 + br i1 %cmp7, label %if.then, label %if.else, !dbg !16 + +if.then: ; preds = %for.body3 + %mul = mul nsw i32 %j.023, 300, !dbg !18 + %add = add nsw i32 %mul, %i.025, !dbg !18 + %call8 = tail call double %_Z3fooi._Z3bari(i32 %add), !dbg !18 + br label %for.inc, !dbg !18 + +if.else: ; preds = %for.body3 + %div = sdiv i32 %j.023, 840, !dbg !19 + %sub = sub nsw i32 %i.025, %div, !dbg !19 + %call10 = tail call double %_Z3fooi._Z3bari(i32 %sub), !dbg !19 + br label %for.inc + +for.inc: ; preds = %if.then, %if.else + %call8.pn = phi double [ %call8, %if.then ], [ %call10, %if.else ] + %S.2 = fadd double %S.122, %call8.pn, !dbg !18 + %inc = add nsw i32 %j.023, 1, !dbg !20 + %exitcond = icmp eq i32 %j.023, 5999, !dbg !14 + br i1 %exitcond, label %for.inc12, label %for.body3, !dbg !14 + +for.inc12: ; preds = %for.inc + %S.2.lcssa = phi double [ %S.2, %for.inc ] + %inc13 = add nsw i32 %i.025, 1, !dbg !22 + %exitcond26 = icmp eq i32 %i.025, 9999, !dbg !12 + br i1 %exitcond26, label %for.end14, label %for.cond1.preheader, !dbg !12 + +for.end14: ; preds = %for.inc12 + %S.2.lcssa.lcssa = phi double [ %S.2.lcssa, %for.inc12 ] + %call15 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i64 0, i64 0), double %S.2.lcssa.lcssa), !dbg !24 + ret i32 0, !dbg !25 +} + +; Function Attrs: nounwind +declare i32 @rand() #1 + +; Function Attrs: nounwind +declare i32 @printf(i8* nocapture readonly, ...) #1 + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} +!llvm.dbg.cu = !{!26} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"clang version 3.6.0 "} +!2 = !DILocation(line: 9, column: 3, scope: !3) +!3 = distinct !DISubprogram(name: "foo", line: 8, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, unit: !26, scopeLine: 8, file: !4, scope: !5, type: !6, retainedNodes: !7) +!4 = !DIFile(filename: "fnptr.cc", directory: ".") +!5 = !DIFile(filename: "fnptr.cc", directory: ".") +!6 = !DISubroutineType(types: !7) +!7 = !{} +!8 = !DILocation(line: 9, column: 14, scope: !3) +!9 = !DILocation(line: 13, column: 3, scope: !10) +!10 = distinct !DISubprogram(name: "bar", line: 12, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, unit: !26, scopeLine: 12, file: !4, scope: !5, type: !6, retainedNodes: !7) +!11 = !DILocation(line: 13, column: 14, scope: !10) +!12 = !DILocation(line: 19, column: 3, scope: !13) +!13 = distinct !DISubprogram(name: "main", line: 16, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, unit: !26, scopeLine: 16, file: !4, scope: !5, type: !6, retainedNodes: !7) +!14 = !DILocation(line: 20, column: 5, scope: !13) +!15 = !DILocation(line: 21, column: 15, scope: !13) +!16 = !DILocation(line: 22, column: 11, scope: !13) +!18 = !DILocation(line: 23, column: 14, scope: !13) +!19 = !DILocation(line: 25, column: 14, scope: !13) +!20 = !DILocation(line: 20, column: 28, scope: !13) +!22 = !DILocation(line: 19, column: 26, scope: !13) +!24 = !DILocation(line: 27, column: 3, scope: !13) +!25 = !DILocation(line: 28, column: 3, scope: !13) +!26 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: FullDebug, file: !4) diff --git a/llvm/test/Transforms/SampleProfile/function_metadata.ll b/llvm/test/Transforms/SampleProfile/function_metadata.ll new file mode 100644 index 00000000000..0e772e86f72 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/function_metadata.ll @@ -0,0 +1,59 @@ +; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/function_metadata.prof -S | FileCheck %s +; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -profile-file=%S/Inputs/function_metadata.compact.afdo -S | FileCheck %s + +; Tests whether the functions in the inline stack are added to the +; function_entry_count metadata. + +declare void @foo() + +define void @foo_available() !dbg !11 { + ret void +} + +; CHECK: define void @test({{.*}} !prof ![[ENTRY_TEST:[0-9]+]] +define void @test(void ()*) !dbg !7 { + %2 = alloca void ()* + store void ()* %0, void ()** %2 + %3 = load void ()*, void ()** %2 + ; CHECK: call {{.*}}, !prof ![[PROF:[0-9]+]] + call void @foo(), !dbg !18 + call void %3(), !dbg !19 + ret void +} + +; CHECK: define void @test_liveness({{.*}} !prof ![[ENTRY_TEST_LIVENESS:[0-9]+]] +define void @test_liveness() !dbg !12 { + call void @foo(), !dbg !20 + ret void +} + +; GUIDs of foo, bar, foo1, foo2 and foo3 should be included in the metadata to +; make sure hot inline stacks are imported. The total count of baz is lower +; than the hot cutoff threshold and its GUID should not be included in the +; metadata. +; CHECK: ![[ENTRY_TEST]] = !{!"function_entry_count", i64 1, i64 2494702099028631698, i64 6699318081062747564, i64 7682762345278052905, i64 -7908226060800700466, i64 -2012135647395072713} + +; Check GUIDs for both foo and foo_available are included in the metadata to +; make sure the liveness analysis can capture the dependency from test_liveness +; to foo_available. +; CHECK: ![[ENTRY_TEST_LIVENESS]] = !{!"function_entry_count", i64 1, i64 4005816710939881937, i64 6699318081062747564} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!8, !9} +!llvm.ident = !{!10} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2) +!1 = !DIFile(filename: "calls.cc", directory: ".") +!2 = !{} +!6 = !DISubroutineType(types: !2) +!7 = distinct !DISubprogram(name: "test", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !1, type: !6, retainedNodes: !2) +!8 = !{i32 2, !"Dwarf Version", i32 4} +!9 = !{i32 1, !"Debug Info Version", i32 3} +!10 = !{!"clang version 3.5 "} +!11 = distinct !DISubprogram(name: "foo_available", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !1, type: !6, retainedNodes: !2) +!12 = distinct !DISubprogram(name: "test_liveness", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !1, type: !6, retainedNodes: !2) +!15 = !DILexicalBlockFile(discriminator: 1, file: !1, scope: !7) +!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7) +!18 = !DILocation(line: 10, scope: !17) +!19 = !DILocation(line: 11, scope: !17) +!20 = !DILocation(line: 8, scope: !12) diff --git a/llvm/test/Transforms/SampleProfile/gcc-simple.ll b/llvm/test/Transforms/SampleProfile/gcc-simple.ll new file mode 100644 index 00000000000..23e990fcd24 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/gcc-simple.ll @@ -0,0 +1,218 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/gcc-simple.afdo -S | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/gcc-simple.afdo -S | FileCheck %s +; XFAIL: powerpc-, powerpc64-, s390x, mips-, mips64-, sparc +; Original code: +; +; #include <stdlib.h> +; +; long long int foo(long i) { +; if (rand() < 500) return 2; else if (rand() > 5000) return 10; else return 90; +; } +; +; int main() { +; long long int sum = 0; +; for (int k = 0; k < 3000; k++) +; for (int i = 0; i < 200000; i++) sum += foo(i); +; return sum > 0 ? 0 : 1; +; } +; +; This test was compiled down to bytecode at -O0 to avoid inlining foo() into +; main(). The profile was generated using a GCC-generated binary (also compiled +; at -O0). The conversion from the Linux Perf profile to the GCC autofdo +; profile used the converter at https://github.com/google/autofdo +; +; $ gcc -g -O0 gcc-simple.cc -o gcc-simple +; $ perf record -b ./gcc-simple +; $ create_gcov --binary=gcc-simple --gcov=gcc-simple.afdo + +define i64 @_Z3fool(i64 %i) #0 !dbg !4 { +; CHECK: !prof ![[EC1:[0-9]+]] +entry: + %retval = alloca i64, align 8 + %i.addr = alloca i64, align 8 + store i64 %i, i64* %i.addr, align 8 + call void @llvm.dbg.declare(metadata i64* %i.addr, metadata !16, metadata !17), !dbg !18 + %call = call i32 @rand() #3, !dbg !19 + %cmp = icmp slt i32 %call, 500, !dbg !21 + br i1 %cmp, label %if.then, label %if.else, !dbg !22 +; CHECK: !prof ![[PROF1:[0-9]+]] + +if.then: ; preds = %entry + store i64 2, i64* %retval, align 8, !dbg !23 + br label %return, !dbg !23 + +if.else: ; preds = %entry + %call1 = call i32 @rand() #3, !dbg !25 + %cmp2 = icmp sgt i32 %call1, 5000, !dbg !28 + br i1 %cmp2, label %if.then.3, label %if.else.4, !dbg !29 +; CHECK: !prof ![[PROF2:[0-9]+]] + +if.then.3: ; preds = %if.else + store i64 10, i64* %retval, align 8, !dbg !30 + br label %return, !dbg !30 + +if.else.4: ; preds = %if.else + store i64 90, i64* %retval, align 8, !dbg !32 + br label %return, !dbg !32 + +return: ; preds = %if.else.4, %if.then.3, %if.then + %0 = load i64, i64* %retval, align 8, !dbg !34 + ret i64 %0, !dbg !34 +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +; Function Attrs: nounwind +declare i32 @rand() #2 + +; Function Attrs: nounwind uwtable +define i32 @main() #0 !dbg !9 { +; CHECK: !prof ![[EC2:[0-9]+]] +entry: + %retval = alloca i32, align 4 + %sum = alloca i64, align 8 + %k = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval, align 4 + call void @llvm.dbg.declare(metadata i64* %sum, metadata !35, metadata !17), !dbg !36 + store i64 0, i64* %sum, align 8, !dbg !36 + call void @llvm.dbg.declare(metadata i32* %k, metadata !37, metadata !17), !dbg !39 + store i32 0, i32* %k, align 4, !dbg !39 + br label %for.cond, !dbg !40 + +for.cond: ; preds = %for.inc.4, %entry + %0 = load i32, i32* %k, align 4, !dbg !41 + %cmp = icmp slt i32 %0, 3000, !dbg !45 + br i1 %cmp, label %for.body, label %for.end.6, !dbg !46 +; CHECK: !prof ![[PROF3:[0-9]+]] + +for.body: ; preds = %for.cond + call void @llvm.dbg.declare(metadata i32* %i, metadata !47, metadata !17), !dbg !49 + store i32 0, i32* %i, align 4, !dbg !49 + br label %for.cond.1, !dbg !50 + +for.cond.1: ; preds = %for.inc, %for.body + %1 = load i32, i32* %i, align 4, !dbg !51 + %cmp2 = icmp slt i32 %1, 200000, !dbg !55 + br i1 %cmp2, label %for.body.3, label %for.end, !dbg !56 +; CHECK: !prof ![[PROF4:[0-9]+]] + +for.body.3: ; preds = %for.cond.1 + %2 = load i32, i32* %i, align 4, !dbg !57 + %conv = sext i32 %2 to i64, !dbg !57 + %call = call i64 @_Z3fool(i64 %conv), !dbg !59 + %3 = load i64, i64* %sum, align 8, !dbg !60 + %add = add nsw i64 %3, %call, !dbg !60 + store i64 %add, i64* %sum, align 8, !dbg !60 + br label %for.inc, !dbg !61 + +for.inc: ; preds = %for.body.3 + %4 = load i32, i32* %i, align 4, !dbg !62 + %inc = add nsw i32 %4, 1, !dbg !62 + store i32 %inc, i32* %i, align 4, !dbg !62 + br label %for.cond.1, !dbg !64 + +for.end: ; preds = %for.cond.1 + br label %for.inc.4, !dbg !65 + +for.inc.4: ; preds = %for.end + %5 = load i32, i32* %k, align 4, !dbg !67 + %inc5 = add nsw i32 %5, 1, !dbg !67 + store i32 %inc5, i32* %k, align 4, !dbg !67 + br label %for.cond, !dbg !68 + +for.end.6: ; preds = %for.cond + %6 = load i64, i64* %sum, align 8, !dbg !69 + %cmp7 = icmp sgt i64 %6, 0, !dbg !70 + %cond = select i1 %cmp7, i32 0, i32 1, !dbg !69 + ret i32 %cond, !dbg !71 +} + +; CHECK ![[EC1]] = !{!"function_entry_count", i64 24108} +; CHECK ![[PROF1]] = !{!"branch_weights", i32 1, i32 30124} +; CHECK ![[PROF2]] = !{!"branch_weights", i32 30177, i32 29579} +; CHECK ![[EC2]] = !{!"function_entry_count", i64 0} +; CHECK ![[PROF3]] = !{!"branch_weights", i32 1, i32 1} +; CHECK ![[PROF4]] = !{!"branch_weights", i32 1, i32 20238} + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!13, !14} +!llvm.ident = !{!15} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 247554) (llvm/trunk 247557)", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2) +!1 = !DIFile(filename: "discriminator.cc", directory: "/usr/local/google/home/dnovillo/llvm/test/autofdo") +!2 = !{} +!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fool", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) +!5 = !DISubroutineType(types: !6) +!6 = !{!7, !8} +!7 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed) +!8 = !DIBasicType(name: "long int", size: 64, align: 64, encoding: DW_ATE_signed) +!9 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !10, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) +!10 = !DISubroutineType(types: !11) +!11 = !{!12} +!12 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!13 = !{i32 2, !"Dwarf Version", i32 4} +!14 = !{i32 2, !"Debug Info Version", i32 3} +!15 = !{!"clang version 3.8.0 (trunk 247554) (llvm/trunk 247557)"} +!16 = !DILocalVariable(name: "i", arg: 1, scope: !4, file: !1, line: 3, type: !8) +!17 = !DIExpression() +!18 = !DILocation(line: 3, column: 24, scope: !4) +!19 = !DILocation(line: 4, column: 7, scope: !20) +!20 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 7) +!21 = !DILocation(line: 4, column: 14, scope: !20) +!22 = !DILocation(line: 4, column: 7, scope: !4) +!23 = !DILocation(line: 4, column: 21, scope: !24) +!24 = !DILexicalBlockFile(scope: !20, file: !1, discriminator: 1) +!25 = !DILocation(line: 4, column: 40, scope: !26) +!26 = !DILexicalBlockFile(scope: !27, file: !1, discriminator: 2) +!27 = distinct !DILexicalBlock(scope: !20, file: !1, line: 4, column: 40) +!28 = !DILocation(line: 4, column: 47, scope: !27) +!29 = !DILocation(line: 4, column: 40, scope: !20) +!30 = !DILocation(line: 4, column: 55, scope: !31) +!31 = !DILexicalBlockFile(scope: !27, file: !1, discriminator: 3) +!32 = !DILocation(line: 4, column: 71, scope: !33) +!33 = !DILexicalBlockFile(scope: !27, file: !1, discriminator: 4) +!34 = !DILocation(line: 5, column: 1, scope: !4) +!35 = !DILocalVariable(name: "sum", scope: !9, file: !1, line: 8, type: !7) +!36 = !DILocation(line: 8, column: 17, scope: !9) +!37 = !DILocalVariable(name: "k", scope: !38, file: !1, line: 9, type: !12) +!38 = distinct !DILexicalBlock(scope: !9, file: !1, line: 9, column: 3) +!39 = !DILocation(line: 9, column: 12, scope: !38) +!40 = !DILocation(line: 9, column: 8, scope: !38) +!41 = !DILocation(line: 9, column: 19, scope: !42) +!42 = !DILexicalBlockFile(scope: !43, file: !1, discriminator: 2) +!43 = !DILexicalBlockFile(scope: !44, file: !1, discriminator: 1) +!44 = distinct !DILexicalBlock(scope: !38, file: !1, line: 9, column: 3) +!45 = !DILocation(line: 9, column: 21, scope: !44) +!46 = !DILocation(line: 9, column: 3, scope: !38) +!47 = !DILocalVariable(name: "i", scope: !48, file: !1, line: 10, type: !12) +!48 = distinct !DILexicalBlock(scope: !44, file: !1, line: 10, column: 5) +!49 = !DILocation(line: 10, column: 14, scope: !48) +!50 = !DILocation(line: 10, column: 10, scope: !48) +!51 = !DILocation(line: 10, column: 21, scope: !52) +!52 = !DILexicalBlockFile(scope: !53, file: !1, discriminator: 5) +!53 = !DILexicalBlockFile(scope: !54, file: !1, discriminator: 1) +!54 = distinct !DILexicalBlock(scope: !48, file: !1, line: 10, column: 5) +!55 = !DILocation(line: 10, column: 23, scope: !54) +!56 = !DILocation(line: 10, column: 5, scope: !48) +!57 = !DILocation(line: 10, column: 49, scope: !58) +!58 = !DILexicalBlockFile(scope: !54, file: !1, discriminator: 2) +!59 = !DILocation(line: 10, column: 45, scope: !54) +!60 = !DILocation(line: 10, column: 42, scope: !54) +!61 = !DILocation(line: 10, column: 38, scope: !54) +!62 = !DILocation(line: 10, column: 34, scope: !63) +!63 = !DILexicalBlockFile(scope: !54, file: !1, discriminator: 4) +!64 = !DILocation(line: 10, column: 5, scope: !54) +!65 = !DILocation(line: 10, column: 50, scope: !66) +!66 = !DILexicalBlockFile(scope: !48, file: !1, discriminator: 3) +!67 = !DILocation(line: 9, column: 30, scope: !44) +!68 = !DILocation(line: 9, column: 3, scope: !44) +!69 = !DILocation(line: 11, column: 10, scope: !9) +!70 = !DILocation(line: 11, column: 14, scope: !9) +!71 = !DILocation(line: 11, column: 3, scope: !9) diff --git a/llvm/test/Transforms/SampleProfile/indirect-call-gcc.ll b/llvm/test/Transforms/SampleProfile/indirect-call-gcc.ll new file mode 100644 index 00000000000..678c7931250 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/indirect-call-gcc.ll @@ -0,0 +1,26 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/indirect-call.afdo -S | FileCheck %s + +; Checks if indirect call targets are read correctly when reading from gcc +; format profile. +; It is expected to fail on certain architectures as gcc profile reader does +; not work. +; XFAIL: powerpc64-, s390x, mips-, mips64-, sparc + +define void @test(void ()*) !dbg !3 { + %2 = alloca void ()* + store void ()* %0, void ()** %2 + %3 = load void ()*, void ()** %2 + ; CHECK: call {{.*}}, !prof ![[PROF:[0-9]+]] + call void %3(), !dbg !4 + ret void +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1) +!1 = !DIFile(filename: "test.cc", directory: "/") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, unit: !0) +!4 = !DILocation(line: 5, scope: !3) +; CHECK: ![[PROF]] = !{!"VP", i32 0, i64 3457, i64 9191153033785521275, i64 2059, i64 -1069303473483922844, i64 1398} diff --git a/llvm/test/Transforms/SampleProfile/indirect-call.ll b/llvm/test/Transforms/SampleProfile/indirect-call.ll new file mode 100644 index 00000000000..95d0c473ae7 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/indirect-call.ll @@ -0,0 +1,213 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/indirect-call.prof -S | FileCheck %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/indirect-call.compact.afdo -S | FileCheck %s + +; CHECK-LABEL: @test +define void @test(void ()*) !dbg !3 { + %2 = alloca void ()* + store void ()* %0, void ()** %2 + %3 = load void ()*, void ()** %2 + ; CHECK: call {{.*}}, !prof ![[PROF:[0-9]+]] + call void %3(), !dbg !4 + ret void +} + +; CHECK-LABEL: @test_inline +; If the indirect call is promoted and inlined in profile, we should promote and inline it. +define void @test_inline(i64* (i32*)*, i32* %x) !dbg !6 { + %2 = alloca i64* (i32*)* + store i64* (i32*)* %0, i64* (i32*)** %2 + %3 = load i64* (i32*)*, i64* (i32*)** %2 +; CHECK: icmp {{.*}} @foo_inline2 +; CHECK: br {{.*}} !prof ![[BR1:[0-9]+]] +; CHECK: if.true.direct_targ: +; CHECK-NOT: call +; CHECK: if.false.orig_indirect: +; CHECK: icmp {{.*}} @foo_inline1 +; CHECK: br {{.*}} !prof ![[BR2:[0-9]+]] +; CHECK: if.true.direct_targ1: +; CHECK-NOT: call +; CHECK: if.false.orig_indirect2: +; CHECK: call {{.*}} !prof ![[VP:[0-9]+]] + call i64* %3(i32* %x), !dbg !7 + ret void +} + +; CHECK-LABEL: @test_inline_strip +; If the indirect call is promoted and inlined in profile, and the callee name +; is stripped we should promote and inline it. +define void @test_inline_strip(i64* (i32*)*, i32* %x) !dbg !8 { + %2 = alloca i64* (i32*)* + store i64* (i32*)* %0, i64* (i32*)** %2 + %3 = load i64* (i32*)*, i64* (i32*)** %2 +; CHECK: icmp {{.*}} @foo_inline_strip.suffix +; CHECK: if.true.direct_targ: +; CHECK-NOT: call +; CHECK: if.false.orig_indirect: +; CHECK: call + call i64* %3(i32* %x), !dbg !9 + ret void +} + +; CHECK-LABEL: @test_inline_strip_conflict +; If the indirect call is promoted and inlined in profile, and the callee name +; is stripped, but have more than 1 potential match, we should not promote. +define void @test_inline_strip_conflict(i64* (i32*)*, i32* %x) !dbg !10 { + %2 = alloca i64* (i32*)* + store i64* (i32*)* %0, i64* (i32*)** %2 + %3 = load i64* (i32*)*, i64* (i32*)** %2 +; CHECK-NOT: if.true.direct_targ: + call i64* %3(i32* %x), !dbg !11 + ret void +} + +; CHECK-LABEL: @test_noinline +; If the indirect call target is not available, we should not promote it. +define void @test_noinline(void ()*) !dbg !12 { + %2 = alloca void ()* + store void ()* %0, void ()** %2 + %3 = load void ()*, void ()** %2 +; CHECK-NOT: icmp +; CHECK: call + call void %3(), !dbg !13 + ret void +} + +; CHECK-LABEL: @test_noinline_bitcast +; If the indirect call has been promoted to a direct call with bitcast, +; do not inline it. +define float @test_noinline_bitcast(float ()*) !dbg !26 { + %2 = alloca float ()* + store float ()* %0, float ()** %2 +; CHECK: icmp +; CHECK: call + %3 = load float ()*, float ()** %2 + %4 = call float %3(), !dbg !27 + ret float %4 +} + +; CHECK-LABEL: @test_norecursive_inline +; If the indirect call target is the caller, we should not promote it. +define void @test_norecursive_inline() !dbg !24 { +; CHECK-NOT: icmp +; CHECK: call + %1 = load void ()*, void ()** @y, align 8 + call void %1(), !dbg !25 + ret void +} + +define i32* @return_arg(i32* readnone returned) !dbg !29{ + ret i32* %0 +} + +; CHECK-LABEL: @return_arg_caller +; When the promoted indirect call returns a parameter that was defined by the +; return value of a previous direct call. Checks both direct call and promoted +; indirect call are inlined. +define i32* @return_arg_caller(i32* (i32*)* nocapture) !dbg !30{ +; CHECK-NOT: call i32* @foo_inline1 +; CHECK: if.true.direct_targ: +; CHECK-NOT: call +; CHECK: if.false.orig_indirect: +; CHECK: call + %2 = call i32* @foo_inline1(i32* null), !dbg !31 + %cmp = icmp ne i32* %2, null + br i1 %cmp, label %then, label %else + +then: + %3 = tail call i32* %0(i32* %2), !dbg !32 + ret i32* %3 + +else: + ret i32* null +} + +@x = global i32 0, align 4 +@y = global void ()* null, align 8 + +define i32* @foo_inline1(i32* %x) !dbg !14 { + ret i32* %x +} + +define i32* @foo_inline_strip.suffix(i32* %x) !dbg !15 { + ret i32* %x +} + +define i32* @foo_inline_strip_conflict.suffix1(i32* %x) !dbg !16 { + ret i32* %x +} + +define i32* @foo_inline_strip_conflict.suffix2(i32* %x) !dbg !17 { + ret i32* %x +} + +define i32* @foo_inline_strip_conflict.suffix3(i32* %x) !dbg !18 { + ret i32* %x +} + +define i32* @foo_inline2(i32* %x) !dbg !19 { + ret i32* %x +} + +define i32 @foo_noinline(i32 %x) !dbg !20 { + ret i32 %x +} + +define void @foo_direct() !dbg !21 { + ret void +} + +define i32 @foo_direct_i32() !dbg !28 { + ret i32 0; +} + +; CHECK-LABEL: @test_direct +; We should not promote a direct call. +define void @test_direct() !dbg !22 { +; CHECK-NOT: icmp +; CHECK: call + call void @foo_alias(), !dbg !23 + ret void +} + +@foo_alias = alias void (), void ()* @foo_direct + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1) +!1 = !DIFile(filename: "test.cc", directory: "/") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 3, unit: !0) +!4 = !DILocation(line: 4, scope: !3) +!5 = !DILocation(line: 6, scope: !3) +; CHECK: ![[PROF]] = !{!"VP", i32 0, i64 3457, i64 9191153033785521275, i64 2059, i64 -1069303473483922844, i64 1398} +; CHECK: ![[BR1]] = !{!"branch_weights", i32 4000, i32 4000} +; CHECK: ![[BR2]] = !{!"branch_weights", i32 3000, i32 1000} +; CHECK: ![[VP]] = !{!"VP", i32 0, i64 8000, i64 -6391416044382067764, i64 1000} +!6 = distinct !DISubprogram(name: "test_inline", scope: !1, file: !1, line: 6, unit: !0) +!7 = !DILocation(line: 7, scope: !6) +!8 = distinct !DISubprogram(name: "test_inline_strip", scope: !1, file: !1, line: 8, unit: !0) +!9 = !DILocation(line: 9, scope: !8) +!10 = distinct !DISubprogram(name: "test_inline_strip_conflict", scope: !1, file: !1, line: 10, unit: !0) +!11 = !DILocation(line: 11, scope: !10) +!12 = distinct !DISubprogram(name: "test_noinline", scope: !1, file: !1, line: 12, unit: !0) +!13 = !DILocation(line: 13, scope: !12) +!14 = distinct !DISubprogram(name: "foo_inline1", scope: !1, file: !1, line: 11, unit: !0) +!15 = distinct !DISubprogram(name: "foo_inline_strip.suffix", scope: !1, file: !1, line: 1, unit: !0) +!16 = distinct !DISubprogram(name: "foo_inline_strip_conflict.suffix1", scope: !1, file: !1, line: 1, unit: !0) +!17 = distinct !DISubprogram(name: "foo_inline_strip_conflict.suffix2", scope: !1, file: !1, line: 1, unit: !0) +!18 = distinct !DISubprogram(name: "foo_inline_strip_conflict.suffix3", scope: !1, file: !1, line: 1, unit: !0) +!19 = distinct !DISubprogram(name: "foo_inline2", scope: !1, file: !1, line: 19, unit: !0) +!20 = distinct !DISubprogram(name: "foo_noinline", scope: !1, file: !1, line: 20, unit: !0) +!21 = distinct !DISubprogram(name: "foo_direct", scope: !1, file: !1, line: 21, unit: !0) +!22 = distinct !DISubprogram(name: "test_direct", scope: !1, file: !1, line: 22, unit: !0) +!23 = !DILocation(line: 23, scope: !22) +!24 = distinct !DISubprogram(name: "test_norecursive_inline", scope: !1, file: !1, line: 12, unit: !0) +!25 = !DILocation(line: 13, scope: !24) +!26 = distinct !DISubprogram(name: "test_noinline_bitcast", scope: !1, file: !1, line: 12, unit: !0) +!27 = !DILocation(line: 13, scope: !26) +!28 = distinct !DISubprogram(name: "foo_direct_i32", scope: !1, file: !1, line: 11, unit: !0) +!29 = distinct !DISubprogram(name: "return_arg", scope: !1, file: !1, line: 11, unit: !0) +!30 = distinct !DISubprogram(name: "return_arg_caller", scope: !1, file: !1, line: 11, unit: !0) +!31 = !DILocation(line: 12, scope: !30) +!32 = !DILocation(line: 13, scope: !30) diff --git a/llvm/test/Transforms/SampleProfile/inline-act.ll b/llvm/test/Transforms/SampleProfile/inline-act.ll new file mode 100644 index 00000000000..f5438701146 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/inline-act.ll @@ -0,0 +1,72 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline-act.prof + +; Sample profile should have non-empty ACT passed to inliner + +; int t; +; bool foo(int value) { +; switch(value) { +; case 0: +; case 1: +; case 3: +; return true; +; default: +; return false; +; } +; } +; void bar(int i) { +; if (foo(i)) +; t *= 2; +; } + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@t = global i32 0, align 4 + +; Function Attrs: nounwind uwtable +define zeroext i1 @_Z3fooi(i32) #0 { + %switch.tableidx = sub i32 %0, 0 + %2 = icmp ult i32 %switch.tableidx, 4 + br i1 %2, label %switch.lookup, label %3 + +switch.lookup: ; preds = %1 + %switch.cast = trunc i32 %switch.tableidx to i4 + %switch.shiftamt = mul i4 %switch.cast, 1 + %switch.downshift = lshr i4 -5, %switch.shiftamt + %switch.masked = trunc i4 %switch.downshift to i1 + ret i1 %switch.masked + +; <label>:3: ; preds = %1 + ret i1 false +} + +; Function Attrs: nounwind uwtable +define void @_Z3bari(i32) #0 !dbg !9 { + %2 = call zeroext i1 @_Z3fooi(i32 %0), !dbg !10 + br i1 %2, label %3, label %6, !dbg !10 + +; <label>:3: ; preds = %1 + %4 = load i32, i32* @t, align 4 + %5 = shl nsw i32 %4, 1 + store i32 %5, i32* @t, align 4 + br label %6 + +; <label>:6: ; preds = %3, %1 + ret void +} + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3} +!llvm.ident = !{!4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.9.0 (trunk 272227) (llvm/trunk 272226)", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2) +!1 = !DIFile(filename: "test.cc", directory: "./") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{!"clang version 3.9.0 (trunk 272227) (llvm/trunk 272226)"} +!6 = !DISubroutineType(types: !2) +!9 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 14, type: !6, isLocal: false, isDefinition: true, scopeLine: 14, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) +!10 = !DILocation(line: 15, column: 7, scope: !9) +!11 = !DILocation(line: 16, column: 7, scope: !9) diff --git a/llvm/test/Transforms/SampleProfile/inline-cold-callsite-samplepgo.ll b/llvm/test/Transforms/SampleProfile/inline-cold-callsite-samplepgo.ll new file mode 100644 index 00000000000..de2fa7cdcbf --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/inline-cold-callsite-samplepgo.ll @@ -0,0 +1,31 @@ +; For SamplePGO, if -profile-sample-accurate is specified, cold callsite +; heuristics should be honored if the caller has no profile. + +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -inline -S -inline-cold-callsite-threshold=0 | FileCheck %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -profile-sample-accurate -inline -S -inline-cold-callsite-threshold=0 | FileCheck %s --check-prefix ACCURATE + +declare void @extern() +define void @callee() { + call void @extern() + ret void +} + +define void @caller(i32 %y1) { +; CHECK-LABEL: @caller +; CHECK-NOT: call void @callee +; ACCURATE-LABEL: @caller +; ACCURATE: call void @callee + call void @callee() + ret void +} + +define void @caller_accurate(i32 %y1) #0 { +; CHECK-LABEL: @caller_accurate +; CHECK: call void @callee +; ACCURATE-LABEL: @caller_accurate +; ACCURATE: call void @callee + call void @callee() + ret void +} + +attributes #0 = { "profile-sample-accurate" } diff --git a/llvm/test/Transforms/SampleProfile/inline-combine.ll b/llvm/test/Transforms/SampleProfile/inline-combine.ll new file mode 100644 index 00000000000..cdbaf3d646d --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/inline-combine.ll @@ -0,0 +1,47 @@ +; RUN: opt < %s -instcombine -sample-profile -sample-profile-file=%S/Inputs/inline-combine.prof -S | FileCheck %s +; RUN: opt < %s -passes="function(instcombine),sample-profile" -sample-profile-file=%S/Inputs/inline-combine.prof -S | FileCheck %s + +%"class.llvm::FoldingSetNodeID" = type { %"class.llvm::SmallVector" } +%"class.llvm::SmallVector" = type { %"class.llvm::SmallVectorImpl.base", %"struct.llvm::SmallVectorStorage" } +%"class.llvm::SmallVectorImpl.base" = type { %"class.llvm::SmallVectorTemplateBase.base" } +%"class.llvm::SmallVectorTemplateBase.base" = type { %"class.llvm::SmallVectorTemplateCommon.base" } +%"class.llvm::SmallVectorTemplateCommon.base" = type <{ %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion" }> +%"class.llvm::SmallVectorBase" = type { i8*, i8*, i8* } +%"struct.llvm::AlignedCharArrayUnion" = type { %"struct.llvm::AlignedCharArray" } +%"struct.llvm::AlignedCharArray" = type { [4 x i8] } +%"struct.llvm::SmallVectorStorage" = type { [31 x %"struct.llvm::AlignedCharArrayUnion"] } +%"class.llvm::SmallVectorImpl" = type { %"class.llvm::SmallVectorTemplateBase.base", [4 x i8] } + +$foo = comdat any + +$bar = comdat any + +define void @foo(%"class.llvm::FoldingSetNodeID"* %this) comdat align 2 !dbg !3 { + %1 = alloca %"class.llvm::FoldingSetNodeID"*, align 8 + store %"class.llvm::FoldingSetNodeID"* %this, %"class.llvm::FoldingSetNodeID"** %1, align 8 + %2 = load %"class.llvm::FoldingSetNodeID"*, %"class.llvm::FoldingSetNodeID"** %1, align 8 + %3 = getelementptr inbounds %"class.llvm::FoldingSetNodeID", %"class.llvm::FoldingSetNodeID"* %2, i32 0, i32 0 +; the call should have been inlined after sample-profile pass +; CHECK-NOT: call + call void bitcast (void (%"class.llvm::SmallVectorImpl"*)* @bar to void (%"class.llvm::SmallVector"*)*)(%"class.llvm::SmallVector"* %3), !dbg !7 + ret void +} + +define void @bar(%"class.llvm::SmallVectorImpl"* %this) comdat align 2 !dbg !8 { + ret void +} + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} +!llvm.dbg.cu = !{!9} + +!0 = !{i32 2, !"Dwarf Version", i32 4} +!1 = !{i32 1, !"Debug Info Version", i32 3} +!2 = !{!"clang version 3.5 "} +!3 = distinct !DISubprogram(name: "foo", scope: !4, file: !4, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !9, retainedNodes: !6) +!4 = !DIFile(filename: "test.cc", directory: ".") +!5 = !DISubroutineType(types: !6) +!6 = !{} +!7 = !DILocation(line: 4, scope: !3) +!8 = distinct !DISubprogram(name: "bar", scope: !4, file: !4, line: 7, type: !5, isLocal: false, isDefinition: true, scopeLine: 7, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !9, retainedNodes: !6) +!9 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: FullDebug, file: !4) diff --git a/llvm/test/Transforms/SampleProfile/inline-coverage.ll b/llvm/test/Transforms/SampleProfile/inline-coverage.ll new file mode 100644 index 00000000000..7e189545190 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/inline-coverage.ll @@ -0,0 +1,135 @@ +; RUN: opt < %s -instcombine -sample-profile -sample-profile-file=%S/Inputs/inline-coverage.prof -sample-profile-check-record-coverage=100 -sample-profile-check-sample-coverage=110 -pass-remarks=sample-profile -pass-remarks-analysis=sample-profile -o /dev/null 2>&1 | FileCheck %s +; RUN: opt < %s -passes="function(instcombine),sample-profile" -sample-profile-file=%S/Inputs/inline-coverage.prof -sample-profile-check-record-coverage=100 -sample-profile-check-sample-coverage=110 -pass-remarks=sample-profile -pass-remarks-analysis=sample-profile -o /dev/null 2>&1 | FileCheck %s +; +; Original code: +; +; 1 #include <stdlib.h> +; 2 +; 3 long long int foo(long i) { +; 4 return rand() * i; +; 5 } +; 6 +; 7 int main() { +; 8 long long int sum = 0; +; 9 for (int i = 0; i < 200000 * 3000; i++) +; 10 sum += foo(i); +; 11 return sum > 0 ? 0 : 1; +; 12 } +; +; CHECK: remark: coverage.cc:10:12: inlined hot callee '_Z3fool' into 'main' +; CHECK: remark: coverage.cc:9:21: Applied 23478 samples from profile (offset: 2.1) +; CHECK: remark: coverage.cc:10:16: Applied 23478 samples from profile (offset: 3) +; CHECK: remark: coverage.cc:4:10: Applied 31878 samples from profile (offset: 1) +; CHECK: remark: coverage.cc:11:10: Applied 0 samples from profile (offset: 4) +; CHECK: remark: coverage.cc:10:16: most popular destination for conditional branches at coverage.cc:9:3 +; +; There is one sample record with 0 samples at offset 4 in main() that we never +; use: +; CHECK: warning: coverage.cc:7: 4 of 5 available profile records (80%) were applied +; +; Since the unused sample record contributes no samples, sample coverage should +; be 100%. Note that we get this warning because we are requesting an impossible +; 110% coverage check. +; CHECK: warning: coverage.cc:7: 78834 of 78834 available profile samples (100%) were applied + +define i64 @_Z3fool(i64 %i) !dbg !4 { +entry: + %i.addr = alloca i64, align 8 + store i64 %i, i64* %i.addr, align 8 + call void @llvm.dbg.declare(metadata i64* %i.addr, metadata !16, metadata !17), !dbg !18 + %call = call i32 @rand(), !dbg !19 + %conv = sext i32 %call to i64, !dbg !19 + %0 = load i64, i64* %i.addr, align 8, !dbg !20 + %mul = mul nsw i64 %conv, %0, !dbg !21 + ret i64 %mul, !dbg !22 +} + +declare void @llvm.dbg.declare(metadata, metadata, metadata) + +declare i32 @rand() + +define i32 @main() !dbg !9 { +entry: + %retval = alloca i32, align 4 + %sum = alloca i64, align 8 + %i = alloca i32, align 4 + store i32 0, i32* %retval, align 4 + call void @llvm.dbg.declare(metadata i64* %sum, metadata !23, metadata !17), !dbg !24 + store i64 0, i64* %sum, align 8, !dbg !24 + call void @llvm.dbg.declare(metadata i32* %i, metadata !25, metadata !17), !dbg !27 + store i32 0, i32* %i, align 4, !dbg !27 + br label %for.cond, !dbg !28 + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, i32* %i, align 4, !dbg !29 + %cmp = icmp slt i32 %0, 600000000, !dbg !32 + br i1 %cmp, label %for.body, label %for.end, !dbg !33 + +for.body: ; preds = %for.cond + %1 = load i32, i32* %i, align 4, !dbg !34 + %conv = sext i32 %1 to i64, !dbg !34 + %call = call i64 @_Z3fool(i64 %conv), !dbg !35 + %2 = load i64, i64* %sum, align 8, !dbg !36 + %add = add nsw i64 %2, %call, !dbg !36 + store i64 %add, i64* %sum, align 8, !dbg !36 + br label %for.inc, !dbg !37 + +for.inc: ; preds = %for.body + %3 = load i32, i32* %i, align 4, !dbg !38 + %inc = add nsw i32 %3, 1, !dbg !38 + store i32 %inc, i32* %i, align 4, !dbg !38 + br label %for.cond, !dbg !39 + +for.end: ; preds = %for.cond + %4 = load i64, i64* %sum, align 8, !dbg !40 + %cmp1 = icmp sgt i64 %4, 0, !dbg !41 + %cond = select i1 %cmp1, i32 0, i32 1, !dbg !40 + ret i32 %cond, !dbg !42 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!13, !14} +!llvm.ident = !{!15} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 251738) (llvm/trunk 251737)", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2) +!1 = !DIFile(filename: "coverage.cc", directory: ".") +!2 = !{} +!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fool", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) +!5 = !DISubroutineType(types: !6) +!6 = !{!7, !8} +!7 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed) +!8 = !DIBasicType(name: "long int", size: 64, align: 64, encoding: DW_ATE_signed) +!9 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !10, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) +!10 = !DISubroutineType(types: !11) +!11 = !{!12} +!12 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!13 = !{i32 2, !"Dwarf Version", i32 4} +!14 = !{i32 2, !"Debug Info Version", i32 3} +!15 = !{!"clang version 3.8.0 (trunk 251738) (llvm/trunk 251737)"} +!16 = !DILocalVariable(name: "i", arg: 1, scope: !4, file: !1, line: 3, type: !8) +!17 = !DIExpression() +!18 = !DILocation(line: 3, column: 24, scope: !4) +!19 = !DILocation(line: 4, column: 10, scope: !4) +!20 = !DILocation(line: 4, column: 19, scope: !4) +!21 = !DILocation(line: 4, column: 17, scope: !4) +!22 = !DILocation(line: 4, column: 3, scope: !4) +!23 = !DILocalVariable(name: "sum", scope: !9, file: !1, line: 8, type: !7) +!24 = !DILocation(line: 8, column: 17, scope: !9) +!25 = !DILocalVariable(name: "i", scope: !26, file: !1, line: 9, type: !12) +!26 = distinct !DILexicalBlock(scope: !9, file: !1, line: 9, column: 3) +!27 = !DILocation(line: 9, column: 12, scope: !26) +!28 = !DILocation(line: 9, column: 8, scope: !26) +!29 = !DILocation(line: 9, column: 19, scope: !30) +!30 = !DILexicalBlockFile(scope: !31, file: !1, discriminator: 2) +!31 = distinct !DILexicalBlock(scope: !26, file: !1, line: 9, column: 3) +!32 = !DILocation(line: 9, column: 21, scope: !30) +!33 = !DILocation(line: 9, column: 3, scope: !30) +!34 = !DILocation(line: 10, column: 16, scope: !31) +!35 = !DILocation(line: 10, column: 12, scope: !31) +!36 = !DILocation(line: 10, column: 9, scope: !31) +!37 = !DILocation(line: 10, column: 5, scope: !31) +!38 = !DILocation(line: 9, column: 39, scope: !31) +!39 = !DILocation(line: 9, column: 3, scope: !31) +!40 = !DILocation(line: 11, column: 10, scope: !9) +!41 = !DILocation(line: 11, column: 14, scope: !9) +!42 = !DILocation(line: 11, column: 3, scope: !9) diff --git a/llvm/test/Transforms/SampleProfile/inline.ll b/llvm/test/Transforms/SampleProfile/inline.ll new file mode 100644 index 00000000000..bd7b024c553 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/inline.ll @@ -0,0 +1,109 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -S | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.prof -S | FileCheck %s + +; Original C++ test case +; +; #include <stdio.h> +; +; int sum(int x, int y) { +; return x + y; +; } +; +; int main() { +; int s, i = 0; +; while (i++ < 20000 * 20000) +; if (i != 100) s = sum(i, s); else s = 30; +; printf("sum is %d\n", s); +; return 0; +; } +; +@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 + +; Function Attrs: nounwind uwtable +define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !4 { +entry: + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %0 = load i32, i32* %x.addr, align 4, !dbg !11 + %1 = load i32, i32* %y.addr, align 4, !dbg !11 + %add = add nsw i32 %0, %1, !dbg !11 + ret i32 %add, !dbg !11 +} + +; Function Attrs: uwtable +define i32 @main() !dbg !7 { +entry: + %retval = alloca i32, align 4 + %s = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval + store i32 0, i32* %i, align 4, !dbg !12 + br label %while.cond, !dbg !13 + +while.cond: ; preds = %if.end, %entry + %0 = load i32, i32* %i, align 4, !dbg !14 + %inc = add nsw i32 %0, 1, !dbg !14 + store i32 %inc, i32* %i, align 4, !dbg !14 + %cmp = icmp slt i32 %0, 400000000, !dbg !14 + br i1 %cmp, label %while.body, label %while.end, !dbg !14 + +while.body: ; preds = %while.cond + %1 = load i32, i32* %i, align 4, !dbg !16 + %cmp1 = icmp ne i32 %1, 100, !dbg !16 + br i1 %cmp1, label %if.then, label %if.else, !dbg !16 + + +if.then: ; preds = %while.body + %2 = load i32, i32* %i, align 4, !dbg !18 + %3 = load i32, i32* %s, align 4, !dbg !18 + %call = call i32 @_Z3sumii(i32 %2, i32 %3), !dbg !18 +; CHECK-NOT: call i32 @_Z3sumii + store i32 %call, i32* %s, align 4, !dbg !18 + br label %if.end, !dbg !18 + +if.else: ; preds = %while.body + store i32 30, i32* %s, align 4, !dbg !20 + br label %if.end + +if.end: ; preds = %if.else, %if.then + br label %while.cond, !dbg !22 + +while.end: ; preds = %while.cond + %4 = load i32, i32* %s, align 4, !dbg !24 + %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %4), !dbg !24 + ret i32 0, !dbg !25 +} + +declare i32 @printf(i8*, ...) #2 + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!8, !9} +!llvm.ident = !{!10} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2) +!1 = !DIFile(filename: "calls.cc", directory: ".") +!2 = !{} +!4 = distinct !DISubprogram(name: "sum", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, retainedNodes: !2) +!5 = !DIFile(filename: "calls.cc", directory: ".") +!6 = !DISubroutineType(types: !2) +!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !5, type: !6, retainedNodes: !2) +!8 = !{i32 2, !"Dwarf Version", i32 4} +!9 = !{i32 1, !"Debug Info Version", i32 3} +!10 = !{!"clang version 3.5 "} +!11 = !DILocation(line: 4, scope: !4) +!12 = !DILocation(line: 8, scope: !7) +!13 = !DILocation(line: 9, scope: !7) +!14 = !DILocation(line: 9, scope: !15) +!15 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !7) +!16 = !DILocation(line: 10, scope: !17) +!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7) +!18 = !DILocation(line: 10, scope: !19) +!19 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !17) +!20 = !DILocation(line: 10, scope: !21) +!21 = !DILexicalBlockFile(discriminator: 4, file: !1, scope: !17) +!22 = !DILocation(line: 10, scope: !23) +!23 = !DILexicalBlockFile(discriminator: 6, file: !1, scope: !17) +!24 = !DILocation(line: 11, scope: !7) +!25 = !DILocation(line: 12, scope: !7) diff --git a/llvm/test/Transforms/SampleProfile/nodebug.ll b/llvm/test/Transforms/SampleProfile/nodebug.ll new file mode 100644 index 00000000000..d1c53c1a6f2 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/nodebug.ll @@ -0,0 +1,20 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/nodebug.prof + +define void @foo() !dbg !3 { + call void @bar(), !dbg !4 + ret void +} + +define void @bar() { + call void @bar() + ret void +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1) +!1 = !DIFile(filename: "t", directory: "/tmp/") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = distinct !DISubprogram(name: "a", scope: !1, file: !1, line: 10, unit: !0) +!4 = !DILocation(line: 10, scope: !3) diff --git a/llvm/test/Transforms/SampleProfile/nolocinfo.ll b/llvm/test/Transforms/SampleProfile/nolocinfo.ll new file mode 100644 index 00000000000..ba4a78e3d84 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/nolocinfo.ll @@ -0,0 +1,38 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/nolocinfo.prof -S -pass-remarks=sample-profile 2>&1 | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/nolocinfo.prof -S -pass-remarks=sample-profile 2>&1 | FileCheck %s + +define i32 @foo(i32 %i) !dbg !4 { +entry: + %i.addr = alloca i32, align 4 + %0 = load i32, i32* %i.addr, align 4 + %cmp = icmp sgt i32 %0, 1000 + +; Remarks for conditional branches need debug location information for the +; referring branch. When that is not present, the compiler should not abort. +; +; CHECK: remark: nolocinfo.c:3:5: most popular destination for conditional branches at <UNKNOWN LOCATION> + br i1 %cmp, label %if.then, label %if.end + +if.then: + ret i32 0, !dbg !18 + +if.end: + ret i32 1 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!8, !9} +!llvm.ident = !{!10} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251335) (llvm/trunk 251344)", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2) +!1 = !DIFile(filename: "nolocinfo.c", directory: ".") +!2 = !{} +!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) +!5 = !DISubroutineType(types: !6) +!6 = !{!7, !7} +!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!8 = !{i32 2, !"Dwarf Version", i32 4} +!9 = !{i32 2, !"Debug Info Version", i32 3} +!10 = !{!"clang version 3.8.0 (trunk 251335) (llvm/trunk 251344)"} +!15 = distinct !DILexicalBlock(scope: !4, file: !1, line: 2, column: 7) +!18 = !DILocation(line: 3, column: 5, scope: !15) diff --git a/llvm/test/Transforms/SampleProfile/offset.ll b/llvm/test/Transforms/SampleProfile/offset.ll new file mode 100644 index 00000000000..145763dc9bc --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/offset.ll @@ -0,0 +1,82 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/offset.prof | opt -analyze -branch-prob | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/offset.prof | opt -analyze -branch-prob | FileCheck %s + +; Original C++ code for this test case: +; +; a.cc: +; #1 +; #2 +; #3 +; #4 +; #5 int foo(int a) { +; #6 #include "a.b" +; #7} +; +; a.b: +; #1 if (a > 0) { +; #2 return 10; +; #3 } else { +; #4 return 20; +; #5 } + +; Function Attrs: nounwind uwtable +define i32 @_Z3fooi(i32 %a) #0 !dbg !4 { +entry: + %retval = alloca i32, align 4 + %a.addr = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !11, metadata !12), !dbg !13 + %0 = load i32, i32* %a.addr, align 4, !dbg !14 + %cmp = icmp sgt i32 %0, 0, !dbg !18 + br i1 %cmp, label %if.then, label %if.else, !dbg !19 +; CHECK: edge entry -> if.then probability is 0x0167ba82 / 0x80000000 = 1.10% +; CHECK: edge entry -> if.else probability is 0x7e98457e / 0x80000000 = 98.90% [HOT edge] + +if.then: ; preds = %entry + store i32 10, i32* %retval, align 4, !dbg !20 + br label %return, !dbg !20 + +if.else: ; preds = %entry + store i32 20, i32* %retval, align 4, !dbg !22 + br label %return, !dbg !22 + +return: ; preds = %if.else, %if.then + %1 = load i32, i32* %retval, align 4, !dbg !24 + ret i32 %1, !dbg !24 +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!8, !9} +!llvm.ident = !{!10} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 250750)", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2) +!1 = !DIFile(filename: "a.cc", directory: "/tmp") +!2 = !{} +!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !1, file: !1, line: 5, type: !5, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) +!5 = !DISubroutineType(types: !6) +!6 = !{!7, !7} +!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!8 = !{i32 2, !"Dwarf Version", i32 4} +!9 = !{i32 2, !"Debug Info Version", i32 3} +!10 = !{!"clang version 3.8.0 (trunk 250750)"} +!11 = !DILocalVariable(name: "a", arg: 1, scope: !4, file: !1, line: 5, type: !7) +!12 = !DIExpression() +!13 = !DILocation(line: 5, column: 13, scope: !4) +!14 = !DILocation(line: 1, column: 5, scope: !15) +!15 = distinct !DILexicalBlock(scope: !17, file: !16, line: 1, column: 5) +!16 = !DIFile(filename: "./a.b", directory: "/tmp") +!17 = !DILexicalBlockFile(scope: !4, file: !16, discriminator: 0) +!18 = !DILocation(line: 1, column: 7, scope: !15) +!19 = !DILocation(line: 1, column: 5, scope: !17) +!20 = !DILocation(line: 2, column: 3, scope: !21) +!21 = distinct !DILexicalBlock(scope: !15, file: !16, line: 1, column: 12) +!22 = !DILocation(line: 4, column: 3, scope: !23) +!23 = distinct !DILexicalBlock(scope: !15, file: !16, line: 3, column: 8) +!24 = !DILocation(line: 7, column: 1, scope: !25) +!25 = !DILexicalBlockFile(scope: !4, file: !1, discriminator: 0) diff --git a/llvm/test/Transforms/SampleProfile/propagate.ll b/llvm/test/Transforms/SampleProfile/propagate.ll new file mode 100644 index 00000000000..303e59a2dbb --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/propagate.ll @@ -0,0 +1,317 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/propagate.prof | opt -analyze -branch-prob | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/propagate.prof | opt -analyze -branch-prob | FileCheck %s + +; Original C++ code for this test case: +; +; #include <stdio.h> +; +; long foo(int x, int y, long N) { +; if (x < y) { +; return y - x; +; } else { +; for (long i = 0; i < N; i++) { +; if (i > N / 3) +; x--; +; if (i > N / 4) { +; y++; +; x += 3; +; } else { +; for (unsigned j = 0; j < 100; j++) { +; x += j; +; y -= 3; +; } +; } +; } +; } +; return y * x; +; } +; +; int main() { +; int x = 5678; +; int y = 1234; +; long N = 9999999; +; printf("foo(%d, %d, %ld) = %ld\n", x, y, N, foo(x, y, N)); +; return 0; +; } + +; ModuleID = 'propagate.cc' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@.str = private unnamed_addr constant [24 x i8] c"foo(%d, %d, %ld) = %ld\0A\00", align 1 + +; Function Attrs: nounwind uwtable +define i64 @_Z3fooiil(i32 %x, i32 %y, i64 %N) #0 !dbg !6 { +entry: + %retval = alloca i64, align 8 + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + %N.addr = alloca i64, align 8 + %i = alloca i64, align 8 + %j = alloca i64, align 8 + store i32 %x, i32* %x.addr, align 4 + call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !11, metadata !12), !dbg !13 + store i32 %y, i32* %y.addr, align 4 + call void @llvm.dbg.declare(metadata i32* %y.addr, metadata !14, metadata !12), !dbg !15 + store i64 %N, i64* %N.addr, align 8 + call void @llvm.dbg.declare(metadata i64* %N.addr, metadata !16, metadata !12), !dbg !17 + %0 = load i32, i32* %x.addr, align 4, !dbg !18 + %1 = load i32, i32* %y.addr, align 4, !dbg !20 + %cmp = icmp slt i32 %0, %1, !dbg !21 + br i1 %cmp, label %if.then, label %if.else, !dbg !22 + +if.then: ; preds = %entry + %2 = load i32, i32* %y.addr, align 4, !dbg !23 + %3 = load i32, i32* %x.addr, align 4, !dbg !25 + %sub = sub nsw i32 %2, %3, !dbg !26 + %conv = sext i32 %sub to i64, !dbg !23 + store i64 %conv, i64* %retval, align 8, !dbg !27 + br label %return, !dbg !27 + +if.else: ; preds = %entry + call void @llvm.dbg.declare(metadata i64* %i, metadata !28, metadata !12), !dbg !31 + store i64 0, i64* %i, align 8, !dbg !31 + br label %for.cond, !dbg !32 + +for.cond: ; preds = %for.inc17, %if.else + %4 = load i64, i64* %i, align 8, !dbg !33 + %5 = load i64, i64* %N.addr, align 8, !dbg !36 + %cmp1 = icmp slt i64 %4, %5, !dbg !37 + br i1 %cmp1, label %for.body, label %for.end19, !dbg !38 + +for.body: ; preds = %for.cond + %6 = load i64, i64* %i, align 8, !dbg !39 + %7 = load i64, i64* %N.addr, align 8, !dbg !42 + %div = sdiv i64 %7, 3, !dbg !43 + %cmp2 = icmp sgt i64 %6, %div, !dbg !44 + br i1 %cmp2, label %if.then3, label %if.end, !dbg !45 +; CHECK: edge for.body -> if.then3 probability is 0x51292fa6 / 0x80000000 = 63.41% +; CHECK: edge for.body -> if.end probability is 0x2ed6d05a / 0x80000000 = 36.59% + +if.then3: ; preds = %for.body + %8 = load i32, i32* %x.addr, align 4, !dbg !46 + %dec = add nsw i32 %8, -1, !dbg !46 + store i32 %dec, i32* %x.addr, align 4, !dbg !46 + br label %if.end, !dbg !47 + +if.end: ; preds = %if.then3, %for.body + %9 = load i64, i64* %i, align 8, !dbg !48 + %10 = load i64, i64* %N.addr, align 8, !dbg !50 + %div4 = sdiv i64 %10, 4, !dbg !51 + %cmp5 = icmp sgt i64 %9, %div4, !dbg !52 + br i1 %cmp5, label %if.then6, label %if.else7, !dbg !53 +; CHECK: edge if.end -> if.then6 probability is 0x5d89d89e / 0x80000000 = 73.08% +; CHECK: edge if.end -> if.else7 probability is 0x22762762 / 0x80000000 = 26.92% + +if.then6: ; preds = %if.end + %11 = load i32, i32* %y.addr, align 4, !dbg !54 + %inc = add nsw i32 %11, 1, !dbg !54 + store i32 %inc, i32* %y.addr, align 4, !dbg !54 + %12 = load i32, i32* %x.addr, align 4, !dbg !56 + %add = add nsw i32 %12, 3, !dbg !56 + store i32 %add, i32* %x.addr, align 4, !dbg !56 + br label %if.end16, !dbg !57 + +if.else7: ; preds = %if.end + call void @llvm.dbg.declare(metadata i64* %j, metadata !58, metadata !12), !dbg !62 + store i64 0, i64* %j, align 8, !dbg !62 + br label %for.cond8, !dbg !63 + +for.cond8: ; preds = %for.inc, %if.else7 + %13 = load i64, i64* %j, align 8, !dbg !64 + %cmp9 = icmp slt i64 %13, 100, !dbg !67 + br i1 %cmp9, label %for.body10, label %for.end, !dbg !68 +; CHECK: edge for.cond8 -> for.body10 probability is 0x7e941a89 / 0x80000000 = 98.89% [HOT edge] +; CHECK: edge for.cond8 -> for.end probability is 0x016be577 / 0x80000000 = 1.11% + + +for.body10: ; preds = %for.cond8 + %14 = load i64, i64* %j, align 8, !dbg !69 + %15 = load i32, i32* %x.addr, align 4, !dbg !71 + %conv11 = sext i32 %15 to i64, !dbg !71 + %add12 = add nsw i64 %conv11, %14, !dbg !71 + %conv13 = trunc i64 %add12 to i32, !dbg !71 + store i32 %conv13, i32* %x.addr, align 4, !dbg !71 + %16 = load i32, i32* %y.addr, align 4, !dbg !72 + %sub14 = sub nsw i32 %16, 3, !dbg !72 + store i32 %sub14, i32* %y.addr, align 4, !dbg !72 + br label %for.inc, !dbg !73 + +for.inc: ; preds = %for.body10 + %17 = load i64, i64* %j, align 8, !dbg !74 + %inc15 = add nsw i64 %17, 1, !dbg !74 + store i64 %inc15, i64* %j, align 8, !dbg !74 + br label %for.cond8, !dbg !76 + +for.end: ; preds = %for.cond8 + br label %if.end16 + +if.end16: ; preds = %for.end, %if.then6 + br label %for.inc17, !dbg !77 + +for.inc17: ; preds = %if.end16 + %18 = load i64, i64* %i, align 8, !dbg !78 + %inc18 = add nsw i64 %18, 1, !dbg !78 + store i64 %inc18, i64* %i, align 8, !dbg !78 + br label %for.cond, !dbg !80 + +for.end19: ; preds = %for.cond + br label %if.end20 + +if.end20: ; preds = %for.end19 + %19 = load i32, i32* %y.addr, align 4, !dbg !81 + %20 = load i32, i32* %x.addr, align 4, !dbg !82 + %mul = mul nsw i32 %19, %20, !dbg !83 + %conv21 = sext i32 %mul to i64, !dbg !81 + store i64 %conv21, i64* %retval, align 8, !dbg !84 + br label %return, !dbg !84 + +return: ; preds = %if.end20, %if.then + %21 = load i64, i64* %retval, align 8, !dbg !85 + ret i64 %21, !dbg !85 +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +; Function Attrs: norecurse uwtable +define i32 @main() #2 !dbg !86 { +entry: + %retval = alloca i32, align 4 + %x = alloca i32, align 4 + %y = alloca i32, align 4 + %N = alloca i64, align 8 + store i32 0, i32* %retval, align 4 + call void @llvm.dbg.declare(metadata i32* %x, metadata !89, metadata !12), !dbg !90 + store i32 5678, i32* %x, align 4, !dbg !90 + call void @llvm.dbg.declare(metadata i32* %y, metadata !91, metadata !12), !dbg !92 + store i32 1234, i32* %y, align 4, !dbg !92 + call void @llvm.dbg.declare(metadata i64* %N, metadata !93, metadata !12), !dbg !94 + store i64 9999999, i64* %N, align 8, !dbg !94 + %0 = load i32, i32* %x, align 4, !dbg !95 + %1 = load i32, i32* %y, align 4, !dbg !96 + %2 = load i64, i64* %N, align 8, !dbg !97 + %3 = load i32, i32* %x, align 4, !dbg !98 + %4 = load i32, i32* %y, align 4, !dbg !99 + %5 = load i64, i64* %N, align 8, !dbg !100 + %call = call i64 @_Z3fooiil(i32 %3, i32 %4, i64 %5), !dbg !101 + %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str, i32 0, i32 0), i32 %0, i32 %1, i64 %2, i64 %call), !dbg !102 + ret i32 0, !dbg !104 +} + +declare i32 @printf(i8*, ...) #3 + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } +attributes #2 = { norecurse uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.9.0 (trunk 266819)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "propagate.cc", directory: ".") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{!"clang version 3.9.0 (trunk 266819)"} +!6 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooiil", scope: !1, file: !1, line: 3, type: !7, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) +!7 = !DISubroutineType(types: !8) +!8 = !{!9, !10, !10, !9} +!9 = !DIBasicType(name: "long int", size: 64, align: 64, encoding: DW_ATE_signed) +!10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!11 = !DILocalVariable(name: "x", arg: 1, scope: !6, file: !1, line: 3, type: !10) +!12 = !DIExpression() +!13 = !DILocation(line: 3, column: 14, scope: !6) +!14 = !DILocalVariable(name: "y", arg: 2, scope: !6, file: !1, line: 3, type: !10) +!15 = !DILocation(line: 3, column: 21, scope: !6) +!16 = !DILocalVariable(name: "N", arg: 3, scope: !6, file: !1, line: 3, type: !9) +!17 = !DILocation(line: 3, column: 29, scope: !6) +!18 = !DILocation(line: 4, column: 7, scope: !19) +!19 = distinct !DILexicalBlock(scope: !6, file: !1, line: 4, column: 7) +!20 = !DILocation(line: 4, column: 11, scope: !19) +!21 = !DILocation(line: 4, column: 9, scope: !19) +!22 = !DILocation(line: 4, column: 7, scope: !6) +!23 = !DILocation(line: 5, column: 12, scope: !24) +!24 = distinct !DILexicalBlock(scope: !19, file: !1, line: 4, column: 14) +!25 = !DILocation(line: 5, column: 16, scope: !24) +!26 = !DILocation(line: 5, column: 14, scope: !24) +!27 = !DILocation(line: 5, column: 5, scope: !24) +!28 = !DILocalVariable(name: "i", scope: !29, file: !1, line: 7, type: !9) +!29 = distinct !DILexicalBlock(scope: !30, file: !1, line: 7, column: 5) +!30 = distinct !DILexicalBlock(scope: !19, file: !1, line: 6, column: 10) +!31 = !DILocation(line: 7, column: 15, scope: !29) +!32 = !DILocation(line: 7, column: 10, scope: !29) +!33 = !DILocation(line: 7, column: 22, scope: !34) +!34 = !DILexicalBlockFile(scope: !35, file: !1, discriminator: 2) +!35 = distinct !DILexicalBlock(scope: !29, file: !1, line: 7, column: 5) +!36 = !DILocation(line: 7, column: 26, scope: !34) +!37 = !DILocation(line: 7, column: 24, scope: !34) +!38 = !DILocation(line: 7, column: 5, scope: !34) +!39 = !DILocation(line: 8, column: 11, scope: !40) +!40 = distinct !DILexicalBlock(scope: !41, file: !1, line: 8, column: 11) +!41 = distinct !DILexicalBlock(scope: !35, file: !1, line: 7, column: 34) +!42 = !DILocation(line: 8, column: 15, scope: !40) +!43 = !DILocation(line: 8, column: 17, scope: !40) +!44 = !DILocation(line: 8, column: 13, scope: !40) +!45 = !DILocation(line: 8, column: 11, scope: !41) +!46 = !DILocation(line: 9, column: 10, scope: !40) +!47 = !DILocation(line: 9, column: 9, scope: !40) +!48 = !DILocation(line: 10, column: 11, scope: !49) +!49 = distinct !DILexicalBlock(scope: !41, file: !1, line: 10, column: 11) +!50 = !DILocation(line: 10, column: 15, scope: !49) +!51 = !DILocation(line: 10, column: 17, scope: !49) +!52 = !DILocation(line: 10, column: 13, scope: !49) +!53 = !DILocation(line: 10, column: 11, scope: !41) +!54 = !DILocation(line: 11, column: 10, scope: !55) +!55 = distinct !DILexicalBlock(scope: !49, file: !1, line: 10, column: 22) +!56 = !DILocation(line: 12, column: 11, scope: !55) +!57 = !DILocation(line: 13, column: 7, scope: !55) +!58 = !DILocalVariable(name: "j", scope: !59, file: !1, line: 14, type: !61) +!59 = distinct !DILexicalBlock(scope: !60, file: !1, line: 14, column: 9) +!60 = distinct !DILexicalBlock(scope: !49, file: !1, line: 13, column: 14) +!61 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed) +!62 = !DILocation(line: 14, column: 24, scope: !59) +!63 = !DILocation(line: 14, column: 14, scope: !59) +!64 = !DILocation(line: 14, column: 31, scope: !65) +!65 = !DILexicalBlockFile(scope: !66, file: !1, discriminator: 2) +!66 = distinct !DILexicalBlock(scope: !59, file: !1, line: 14, column: 9) +!67 = !DILocation(line: 14, column: 33, scope: !65) +!68 = !DILocation(line: 14, column: 9, scope: !65) +!69 = !DILocation(line: 15, column: 16, scope: !70) +!70 = distinct !DILexicalBlock(scope: !66, file: !1, line: 14, column: 45) +!71 = !DILocation(line: 15, column: 13, scope: !70) +!72 = !DILocation(line: 16, column: 13, scope: !70) +!73 = !DILocation(line: 17, column: 9, scope: !70) +!74 = !DILocation(line: 14, column: 41, scope: !75) +!75 = !DILexicalBlockFile(scope: !66, file: !1, discriminator: 4) +!76 = !DILocation(line: 14, column: 9, scope: !75) +!77 = !DILocation(line: 19, column: 5, scope: !41) +!78 = !DILocation(line: 7, column: 30, scope: !79) +!79 = !DILexicalBlockFile(scope: !35, file: !1, discriminator: 4) +!80 = !DILocation(line: 7, column: 5, scope: !79) +!81 = !DILocation(line: 21, column: 10, scope: !6) +!82 = !DILocation(line: 21, column: 14, scope: !6) +!83 = !DILocation(line: 21, column: 12, scope: !6) +!84 = !DILocation(line: 21, column: 3, scope: !6) +!85 = !DILocation(line: 22, column: 1, scope: !6) +!86 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 24, type: !87, isLocal: false, isDefinition: true, scopeLine: 24, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) +!87 = !DISubroutineType(types: !88) +!88 = !{!10} +!89 = !DILocalVariable(name: "x", scope: !86, file: !1, line: 25, type: !10) +!90 = !DILocation(line: 25, column: 7, scope: !86) +!91 = !DILocalVariable(name: "y", scope: !86, file: !1, line: 26, type: !10) +!92 = !DILocation(line: 26, column: 7, scope: !86) +!93 = !DILocalVariable(name: "N", scope: !86, file: !1, line: 27, type: !9) +!94 = !DILocation(line: 27, column: 8, scope: !86) +!95 = !DILocation(line: 28, column: 38, scope: !86) +!96 = !DILocation(line: 28, column: 41, scope: !86) +!97 = !DILocation(line: 28, column: 44, scope: !86) +!98 = !DILocation(line: 28, column: 51, scope: !86) +!99 = !DILocation(line: 28, column: 54, scope: !86) +!100 = !DILocation(line: 28, column: 57, scope: !86) +!101 = !DILocation(line: 28, column: 47, scope: !86) +!102 = !DILocation(line: 28, column: 3, scope: !103) +!103 = !DILexicalBlockFile(scope: !86, file: !1, discriminator: 2) +!104 = !DILocation(line: 29, column: 3, scope: !86) diff --git a/llvm/test/Transforms/SampleProfile/remap.ll b/llvm/test/Transforms/SampleProfile/remap.ll new file mode 100644 index 00000000000..206962a3bef --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/remap.ll @@ -0,0 +1,60 @@ +; RUN: opt %s -passes=sample-profile -sample-profile-file=%S/Inputs/remap.prof -sample-profile-remapping-file=%S/Inputs/remap.map | opt -analyze -branch-prob | FileCheck %s + +; Reduced from branch.ll + +declare i1 @foo() + +define void @_ZN3foo3barERKN1M1XINS_6detail3quxEEE() !dbg !2 { +; CHECK: Printing analysis 'Branch Probability Analysis' for function '_ZN3foo3barERKN1M1XINS_6detail3quxEEE': + +entry: + %cmp = call i1 @foo(), !dbg !6 + br i1 %cmp, label %if.then, label %if.end +; CHECK: edge entry -> if.then probability is 0x4ccf6b16 / 0x80000000 = 60.01% +; CHECK: edge entry -> if.end probability is 0x333094ea / 0x80000000 = 39.99% + +if.then: + br label %return + +if.end: + %cmp1 = call i1 @foo(), !dbg !7 + br i1 %cmp1, label %if.then.2, label %if.else +; CHECK: edge if.end -> if.then.2 probability is 0x6652c748 / 0x80000000 = 79.94% +; CHECK: edge if.end -> if.else probability is 0x19ad38b8 / 0x80000000 = 20.06% + +if.then.2: + call i1 @foo(), !dbg !8 + br label %for.cond + +for.cond: + %cmp5 = call i1 @foo() + br i1 %cmp5, label %for.body, label %for.end, !prof !9 +; CHECK: edge for.cond -> for.body probability is 0x73333333 / 0x80000000 = 90.00% +; CHECK: edge for.cond -> for.end probability is 0x0ccccccd / 0x80000000 = 10.00% + +for.body: + br label %for.cond + +for.end: + br label %return + +if.else: + br label %return + +return: + ret void +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!4, !5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "foo++", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !{}, retainedTypes: !{}) +!1 = !DIFile(filename: "test.cc", directory: "/foo/bar") +!2 = distinct !DISubprogram(name: "_ZN3foo3barERKN1M1XINS_6detail3quxEEE", scope: !1, file: !1, line: 4, type: !3, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !{}) +!3 = !DISubroutineType(types: !{}) +!4 = !{i32 2, !"Dwarf Version", i32 4} +!5 = !{i32 2, !"Debug Info Version", i32 3} +!6 = !DILocation(line: 5, column: 8, scope: !2) +!7 = !DILocation(line: 8, column: 6, scope: !2) +!8 = !DILocation(line: 10, column: 11, scope: !2) +!9 = !{!"branch_weights", i32 90, i32 10} diff --git a/llvm/test/Transforms/SampleProfile/remarks.ll b/llvm/test/Transforms/SampleProfile/remarks.ll new file mode 100644 index 00000000000..3ecaa534296 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/remarks.ll @@ -0,0 +1,225 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/remarks.prof -S -pass-remarks=sample-profile -pass-remarks-output=%t.opt.yaml 2>&1 | FileCheck %s +; RUN: FileCheck %s -check-prefix=YAML < %t.opt.yaml +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/remarks.prof -S -pass-remarks=sample-profile -pass-remarks-output=%t.opt.yaml 2>&1 | FileCheck %s +; RUN: FileCheck %s -check-prefix=YAML < %t.opt.yaml + +; Original test case. +; +; 1 #include <stdlib.h> +; 2 +; 3 long long foo() { +; 4 long long int sum = 0; +; 5 for (int i = 0; i < 500000000; i++) +; 6 if (i < 1000) +; 7 sum -= i; +; 8 else +; 9 sum += -i * rand(); +; 10 return sum; +; 11 } +; 12 +; 13 int main() { return foo() > 0; } + +; We are expecting foo() to be inlined in main() (almost all the cycles are +; spent inside foo). +; CHECK: remark: remarks.cc:13:21: inlined hot callee '_Z3foov' into 'main' + +; The back edge for the loop is the hottest edge in the loop subgraph. +; CHECK: remark: remarks.cc:6:9: most popular destination for conditional branches at remarks.cc:5:3 + +; The predicate almost always chooses the 'else' branch. +; CHECK: remark: remarks.cc:9:15: most popular destination for conditional branches at remarks.cc:6:9 + +; Checking to see if YAML file is generated and contains remarks +;YAML: --- !Passed +;YAML-NEXT: Pass: sample-profile +;YAML-NEXT: Name: HotInline +;YAML-NEXT: DebugLoc: { File: remarks.cc, Line: 13, Column: 21 } +;YAML-NEXT: Function: main +;YAML-NEXT: Args: +;YAML-NEXT: - String: 'inlined hot callee ''' +;YAML-NEXT: - Callee: _Z3foov +;YAML-NEXT: DebugLoc: { File: remarks.cc, Line: 3, Column: 0 } +;YAML-NEXT: - String: ''' into ''' +;YAML-NEXT: - Caller: main +;YAML-NEXT: DebugLoc: { File: remarks.cc, Line: 13, Column: 0 } +;YAML-NEXT: - String: '''' +;YAML-NEXT: ... +;YAML: --- !Analysis +;YAML-NEXT: Pass: sample-profile +;YAML-NEXT: Name: AppliedSamples +;YAML-NEXT: DebugLoc: { File: remarks.cc, Line: 5, Column: 8 } +;YAML-NEXT: Function: main +;YAML-NEXT: Args: +;YAML-NEXT: - String: 'Applied ' +;YAML-NEXT: - NumSamples: '18305' +;YAML-NEXT: - String: ' samples from profile (offset: ' +;YAML-NEXT: - LineOffset: '2' +;YAML-NEXT: - String: ')' +;YAML-NEXT: ... +;YAML: --- !Passed +;YAML-NEXT: Pass: sample-profile +;YAML-NEXT: Name: PopularDest +;YAML-NEXT: DebugLoc: { File: remarks.cc, Line: 6, Column: 9 } +;YAML-NEXT: Function: main +;YAML-NEXT: Args: +;YAML-NEXT: - String: 'most popular destination for conditional branches at ' +;YAML-NEXT: - CondBranchesLoc: 'remarks.cc:5:3' +;YAML-NEXT: DebugLoc: { File: remarks.cc, Line: 5, Column: 3 } +;YAML-NEXT: ... + +; Function Attrs: nounwind uwtable +define i64 @_Z3foov() #0 !dbg !4 { +entry: + %sum = alloca i64, align 8 + %i = alloca i32, align 4 + %0 = bitcast i64* %sum to i8*, !dbg !19 + call void @llvm.lifetime.start.p0i8(i64 8, i8* %0) #4, !dbg !19 + call void @llvm.dbg.declare(metadata i64* %sum, metadata !9, metadata !20), !dbg !21 + store i64 0, i64* %sum, align 8, !dbg !21, !tbaa !22 + %1 = bitcast i32* %i to i8*, !dbg !26 + call void @llvm.lifetime.start.p0i8(i64 4, i8* %1) #4, !dbg !26 + call void @llvm.dbg.declare(metadata i32* %i, metadata !10, metadata !20), !dbg !27 + store i32 0, i32* %i, align 4, !dbg !27, !tbaa !28 + br label %for.cond, !dbg !26 + +for.cond: ; preds = %for.inc, %entry + %2 = load i32, i32* %i, align 4, !dbg !30, !tbaa !28 + %cmp = icmp slt i32 %2, 500000000, !dbg !34 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !35 + +for.cond.cleanup: ; preds = %for.cond + %3 = bitcast i32* %i to i8*, !dbg !36 + call void @llvm.lifetime.end.p0i8(i64 4, i8* %3) #4, !dbg !36 + br label %for.end + +for.body: ; preds = %for.cond + %4 = load i32, i32* %i, align 4, !dbg !38, !tbaa !28 + %cmp1 = icmp slt i32 %4, 1000, !dbg !40 + br i1 %cmp1, label %if.then, label %if.else, !dbg !41 + +if.then: ; preds = %for.body + %5 = load i32, i32* %i, align 4, !dbg !42, !tbaa !28 + %conv = sext i32 %5 to i64, !dbg !42 + %6 = load i64, i64* %sum, align 8, !dbg !43, !tbaa !22 + %sub = sub nsw i64 %6, %conv, !dbg !43 + store i64 %sub, i64* %sum, align 8, !dbg !43, !tbaa !22 + br label %if.end, !dbg !44 + +if.else: ; preds = %for.body + %7 = load i32, i32* %i, align 4, !dbg !45, !tbaa !28 + %sub2 = sub nsw i32 0, %7, !dbg !46 + %call = call i32 @rand() #4, !dbg !47 + %mul = mul nsw i32 %sub2, %call, !dbg !48 + %conv3 = sext i32 %mul to i64, !dbg !46 + %8 = load i64, i64* %sum, align 8, !dbg !49, !tbaa !22 + %add = add nsw i64 %8, %conv3, !dbg !49 + store i64 %add, i64* %sum, align 8, !dbg !49, !tbaa !22 + br label %if.end + +if.end: ; preds = %if.else, %if.then + br label %for.inc, !dbg !50 + +for.inc: ; preds = %if.end + %9 = load i32, i32* %i, align 4, !dbg !51, !tbaa !28 + %inc = add nsw i32 %9, 1, !dbg !51 + store i32 %inc, i32* %i, align 4, !dbg !51, !tbaa !28 + br label %for.cond, !dbg !52 + +for.end: ; preds = %for.cond.cleanup + %10 = load i64, i64* %sum, align 8, !dbg !53, !tbaa !22 + %11 = bitcast i64* %sum to i8*, !dbg !54 + call void @llvm.lifetime.end.p0i8(i64 8, i8* %11) #4, !dbg !54 + ret i64 %10, !dbg !55 +} + +; Function Attrs: nounwind argmemonly +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1 + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata, metadata) #2 + +; Function Attrs: nounwind +declare i32 @rand() #3 + +; Function Attrs: nounwind argmemonly +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1 + +; Function Attrs: nounwind uwtable +define i32 @main() #0 !dbg !13 { +entry: + %retval = alloca i32, align 4 + store i32 0, i32* %retval, align 4 + %call = call i64 @_Z3foov(), !dbg !56 + %cmp = icmp sgt i64 %call, 0, !dbg !57 + %conv = zext i1 %cmp to i32, !dbg !56 + ret i32 %conv, !dbg !58 +} + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind argmemonly } +attributes #2 = { nounwind readnone } +attributes #3 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #4 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!16, !17} +!llvm.ident = !{!18} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 251041) (llvm/trunk 251053)", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2) +!1 = !DIFile(filename: "remarks.cc", directory: ".") +!2 = !{} +!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !8) +!5 = !DISubroutineType(types: !6) +!6 = !{!7} +!7 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed) +!8 = !{!9, !10} +!9 = !DILocalVariable(name: "sum", scope: !4, file: !1, line: 4, type: !7) +!10 = !DILocalVariable(name: "i", scope: !11, file: !1, line: 5, type: !12) +!11 = distinct !DILexicalBlock(scope: !4, file: !1, line: 5, column: 3) +!12 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!13 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 13, type: !14, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) +!14 = !DISubroutineType(types: !15) +!15 = !{!12} +!16 = !{i32 2, !"Dwarf Version", i32 4} +!17 = !{i32 2, !"Debug Info Version", i32 3} +!18 = !{!"clang version 3.8.0 (trunk 251041) (llvm/trunk 251053)"} +!19 = !DILocation(line: 4, column: 3, scope: !4) +!20 = !DIExpression() +!21 = !DILocation(line: 4, column: 17, scope: !4) +!22 = !{!23, !23, i64 0} +!23 = !{!"long long", !24, i64 0} +!24 = !{!"omnipotent char", !25, i64 0} +!25 = !{!"Simple C/C++ TBAA"} +!26 = !DILocation(line: 5, column: 8, scope: !11) +!27 = !DILocation(line: 5, column: 12, scope: !11) +!28 = !{!29, !29, i64 0} +!29 = !{!"int", !24, i64 0} +!30 = !DILocation(line: 5, column: 19, scope: !31) +!31 = !DILexicalBlockFile(scope: !32, file: !1, discriminator: 3) +!32 = !DILexicalBlockFile(scope: !33, file: !1, discriminator: 1) +!33 = distinct !DILexicalBlock(scope: !11, file: !1, line: 5, column: 3) +!34 = !DILocation(line: 5, column: 21, scope: !33) +!35 = !DILocation(line: 5, column: 3, scope: !11) +!36 = !DILocation(line: 5, column: 3, scope: !37) +!37 = !DILexicalBlockFile(scope: !33, file: !1, discriminator: 2) +!38 = !DILocation(line: 6, column: 9, scope: !39) +!39 = distinct !DILexicalBlock(scope: !33, file: !1, line: 6, column: 9) +!40 = !DILocation(line: 6, column: 11, scope: !39) +!41 = !DILocation(line: 6, column: 9, scope: !33) +!42 = !DILocation(line: 7, column: 14, scope: !39) +!43 = !DILocation(line: 7, column: 11, scope: !39) +!44 = !DILocation(line: 7, column: 7, scope: !39) +!45 = !DILocation(line: 9, column: 15, scope: !39) +!46 = !DILocation(line: 9, column: 14, scope: !39) +!47 = !DILocation(line: 9, column: 19, scope: !39) +!48 = !DILocation(line: 9, column: 17, scope: !39) +!49 = !DILocation(line: 9, column: 11, scope: !39) +!50 = !DILocation(line: 6, column: 13, scope: !39) +!51 = !DILocation(line: 5, column: 35, scope: !33) +!52 = !DILocation(line: 5, column: 3, scope: !33) +!53 = !DILocation(line: 10, column: 10, scope: !4) +!54 = !DILocation(line: 11, column: 1, scope: !4) +!55 = !DILocation(line: 10, column: 3, scope: !4) +!56 = !DILocation(line: 13, column: 21, scope: !13) +!57 = !DILocation(line: 13, column: 27, scope: !13) +!58 = !DILocation(line: 13, column: 14, scope: !13) diff --git a/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll b/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll new file mode 100644 index 00000000000..3a985ff7b5e --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll @@ -0,0 +1,52 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -codegenprepare -S | FileCheck %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -codegenprepare -profile-sample-accurate -S | FileCheck %s --check-prefix ACCURATE + +target triple = "x86_64-pc-linux-gnu" + +; The test checks that function without profile gets unlikely section prefix +; if -profile-sample-accurate is specified or the function has the +; profile-sample-accurate attribute. + +declare void @hot_func() + +; CHECK-NOT: foo_not_in_profile{{.*}}!section_prefix +; CHECK: foo_not_in_profile{{.*}}!prof ![[UNKNOWN_ID:[0-9]+]] +; ACCURATE: foo_not_in_profile{{.*}}!prof ![[ZERO_ID:[0-9]+]] !section_prefix ![[COLD_ID:[0-9]+]] +; The function not appearing in profile is cold when -profile-sample-accurate +; is on. +define void @foo_not_in_profile() { + call void @hot_func() + ret void +} + +; CHECK: bar_not_in_profile{{.*}}!prof ![[ZERO_ID:[0-9]+]] !section_prefix ![[COLD_ID:[0-9]+]] +; ACCURATE: bar_not_in_profile{{.*}}!prof ![[ZERO_ID:[0-9]+]] !section_prefix ![[COLD_ID:[0-9]+]] +; The function not appearing in profile is cold when the func has +; profile-sample-accurate attribute. +define void @bar_not_in_profile() #0 { + call void @hot_func() + ret void +} + +attributes #0 = { "profile-sample-accurate" } + +; CHECK: ![[UNKNOWN_ID]] = !{!"function_entry_count", i64 -1} +; CHECK: ![[ZERO_ID]] = !{!"function_entry_count", i64 0} +; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"} +; ACCURATE: ![[ZERO_ID]] = !{!"function_entry_count", i64 0} +; ACCURATE: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"} +!llvm.module.flags = !{!1} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"SampleProfile"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 1000} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} diff --git a/llvm/test/Transforms/SampleProfile/summary.ll b/llvm/test/Transforms/SampleProfile/summary.ll new file mode 100644 index 00000000000..03b6644eacc --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/summary.ll @@ -0,0 +1,26 @@ +; Test that we annotate entire program's summary to IR. +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/summary.prof -S | FileCheck %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/summary.prof -S | opt -sample-profile -sample-profile-file=%S/Inputs/summary.prof -S | FileCheck %s + +define i32 @bar() #0 !dbg !1 { +entry: + ret i32 1, !dbg !2 +} + +define i32 @baz() !dbg !3 { +entry: + %call = call i32 @bar(), !dbg !4 + ret i32 %call, !dbg !5 +} + +; CHECK-DAG: {{![0-9]+}} = !{i32 1, !"ProfileSummary", {{![0-9]+}}} +; CHECK-DAG: {{![0-9]+}} = !{!"TotalCount", i64 900} +; CHECK-DAG: {{![0-9]+}} = !{!"NumCounts", i64 5} +; CHECK-DAG: {{![0-9]+}} = !{!"NumFunctions", i64 3} +; CHECK-DAG: {{![0-9]+}} = !{!"MaxFunctionCount", i64 3} + +!1 = distinct !DISubprogram(name: "bar") +!2 = !DILocation(line: 2, scope: !2) +!3 = distinct !DISubprogram(name: "baz") +!4 = !DILocation(line: 1, scope: !4) +!5 = !DILocation(line: 2, scope: !5) diff --git a/llvm/test/Transforms/SampleProfile/syntax.ll b/llvm/test/Transforms/SampleProfile/syntax.ll new file mode 100644 index 00000000000..7114dfa6157 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/syntax.ll @@ -0,0 +1,29 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/syntax.prof 2>&1 | FileCheck -check-prefix=NO-DEBUG %s +; RUN: not opt < %s -sample-profile -sample-profile-file=missing.prof 2>&1 | FileCheck -check-prefix=MISSING-FILE %s +; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/bad_fn_header.prof 2>&1 | FileCheck -check-prefix=BAD-FN-HEADER %s +; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/bad_sample_line.prof 2>&1 | FileCheck -check-prefix=BAD-SAMPLE-LINE %s +; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/bad_line_values.prof 2>&1 | FileCheck -check-prefix=BAD-LINE-VALUES %s +; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/bad_discriminator_value.prof 2>&1 | FileCheck -check-prefix=BAD-DISCRIMINATOR-VALUE %s +; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/bad_samples.prof 2>&1 | FileCheck -check-prefix=BAD-SAMPLES %s +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/bad_mangle.prof 2>&1 >/dev/null + +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/syntax.prof 2>&1 | FileCheck -check-prefix=NO-DEBUG %s +; RUN: not opt < %s -passes=sample-profile -sample-profile-file=missing.prof 2>&1 | FileCheck -check-prefix=MISSING-FILE %s +; RUN: not opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/bad_fn_header.prof 2>&1 | FileCheck -check-prefix=BAD-FN-HEADER %s +; RUN: not opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/bad_sample_line.prof 2>&1 | FileCheck -check-prefix=BAD-SAMPLE-LINE %s +; RUN: not opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/bad_line_values.prof 2>&1 | FileCheck -check-prefix=BAD-LINE-VALUES %s +; RUN: not opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/bad_discriminator_value.prof 2>&1 | FileCheck -check-prefix=BAD-DISCRIMINATOR-VALUE %s +; RUN: not opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/bad_samples.prof 2>&1 | FileCheck -check-prefix=BAD-SAMPLES %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/bad_mangle.prof 2>&1 >/dev/null + +define void @empty() { +entry: + ret void +} +; NO-DEBUG: warning: No debug information found in function empty: Function profile not used +; MISSING-FILE: missing.prof: Could not open profile: +; BAD-FN-HEADER: error: {{.*}}bad_fn_header.prof: Could not open profile: Unrecognized sample profile encoding format +; BAD-SAMPLE-LINE: error: {{.*}}bad_sample_line.prof:3: Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found 1: BAD +; BAD-LINE-VALUES: error: {{.*}}bad_line_values.prof:2: Expected 'mangled_name:NUM:NUM', found -1: 10 +; BAD-DISCRIMINATOR-VALUE: error: {{.*}}bad_discriminator_value.prof:2: Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found 1.-3: 10 +; BAD-SAMPLES: error: {{.*}}bad_samples.prof:2: Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found 1.3: -10 diff --git a/llvm/test/Transforms/SampleProfile/warm-inline-instance.ll b/llvm/test/Transforms/SampleProfile/warm-inline-instance.ll new file mode 100644 index 00000000000..622db49b707 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/warm-inline-instance.ll @@ -0,0 +1,115 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/warm-inline-instance.prof -S | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/warm-inline-instance.prof -S | FileCheck %s + +@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 + +; Function Attrs: nounwind uwtable +define i32 @foo(i32 %x, i32 %y) !dbg !4 { +entry: + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %t0 = load i32, i32* %x.addr, align 4, !dbg !11 + %t1 = load i32, i32* %y.addr, align 4, !dbg !11 + %add = add nsw i32 %t0, %t1, !dbg !11 + ret i32 %add, !dbg !11 +} + +define i32 @goo(i32 %x, i32 %y) { +entry: + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %t0 = load i32, i32* %x.addr, align 4, !dbg !11 + %t1 = load i32, i32* %y.addr, align 4, !dbg !11 + %add = add nsw i32 %t0, %t1, !dbg !11 + ret i32 %add, !dbg !11 +} + +; Function Attrs: uwtable +define i32 @main() !dbg !7 { +entry: + %retval = alloca i32, align 4 + %s = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval + store i32 0, i32* %i, align 4, !dbg !12 + br label %while.cond, !dbg !13 + +while.cond: ; preds = %if.end, %entry + %t0 = load i32, i32* %i, align 4, !dbg !14 + %inc = add nsw i32 %t0, 1, !dbg !14 + store i32 %inc, i32* %i, align 4, !dbg !14 + %cmp = icmp slt i32 %t0, 400000000, !dbg !14 + br i1 %cmp, label %while.body, label %while.end, !dbg !14 + +while.body: ; preds = %while.cond + %t1 = load i32, i32* %i, align 4, !dbg !16 + %cmp1 = icmp ne i32 %t1, 100, !dbg !16 + br i1 %cmp1, label %if.then, label %if.else, !dbg !16 + +if.then: ; preds = %while.body + %t2 = load i32, i32* %i, align 4, !dbg !18 + %t3 = load i32, i32* %s, align 4, !dbg !18 +; Although the ratio of total samples of @foo vs total samples of @main is +; small, since the total samples count is larger than hot cutoff computed by +; ProfileSummaryInfo, we will still regard the callsite of foo as hot and +; early inlining will inline it. +; CHECK-LABEL: @main( +; CHECK-NOT: call i32 @foo(i32 %t2, i32 %t3) + %call1 = call i32 @foo(i32 %t2, i32 %t3), !dbg !18 + store i32 %call1, i32* %s, align 4, !dbg !18 + br label %if.end, !dbg !18 + +if.else: ; preds = %while.body +; call @goo 's basicblock doesn't get any sample, so no profile will be annotated. +; CHECK: call i32 @goo(i32 2, i32 3), !dbg !{{[0-9]+}} +; CHECK-NOT: !prof +; CHECK-SAME: {{$}} + %call2 = call i32 @goo(i32 2, i32 3), !dbg !26 + store i32 %call2, i32* %s, align 4, !dbg !20 + br label %if.end + +if.end: ; preds = %if.else, %if.then + br label %while.cond, !dbg !22 + +while.end: ; preds = %while.cond + %t4 = load i32, i32* %s, align 4, !dbg !24 + %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %t4), !dbg !24 + ret i32 0, !dbg !25 +} + +declare i32 @printf(i8*, ...) #2 + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!8, !9} +!llvm.ident = !{!10} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2) +!1 = !DIFile(filename: "calls.cc", directory: ".") +!2 = !{} +!4 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, retainedNodes: !2) +!5 = !DIFile(filename: "calls.cc", directory: ".") +!6 = !DISubroutineType(types: !2) +!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !5, type: !6, retainedNodes: !2) +!8 = !{i32 2, !"Dwarf Version", i32 4} +!9 = !{i32 1, !"Debug Info Version", i32 3} +!10 = !{!"clang version 3.5 "} +!11 = !DILocation(line: 4, scope: !4) +!12 = !DILocation(line: 8, scope: !7) +!13 = !DILocation(line: 9, scope: !7) +!14 = !DILocation(line: 9, scope: !15) +!15 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !7) +!16 = !DILocation(line: 10, scope: !17) +!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7) +!18 = !DILocation(line: 10, scope: !19) +!19 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !17) +!20 = !DILocation(line: 10, scope: !21) +!21 = !DILexicalBlockFile(discriminator: 4, file: !1, scope: !17) +!22 = !DILocation(line: 10, scope: !23) +!23 = !DILexicalBlockFile(discriminator: 6, file: !1, scope: !17) +!24 = !DILocation(line: 11, scope: !7) +!25 = !DILocation(line: 12, scope: !7) +!26 = !DILocation(line: 11, scope: !19) |