Add CalledValuePropagation pass

This patch adds a new pass for attaching !callees metadata to indirect call sites. The pass propagates values to call sites by performing an IPSCCP-like analysis using the generic sparse propagation solver. For indirect call sites having a small set of possible callees, the attached metadata indicates what those callees are. The metadata can be used to facilitate optimizations like intersecting the function attributes of the possible callees, refining the call graph, performing indirect call promotion, etc. Differential Revision: https://reviews.llvm.org/D37355 llvm-svn: 316576
author: Matthew Simpson <mssimpso@codeaurora.org> 2017-10-25 13:40:08 +0000
committer: Matthew Simpson <mssimpso@codeaurora.org> 2017-10-25 13:40:08 +0000
commit: cb58558c2f680199ae7d9085c5f58d5e127e6f57 (patch)
tree: 460a2ada335c943a548094a25f65209bbaec4dd8 /llvm/test
parent: 7af3edc4f4f19f0b216378b8e8c8677ce8dbafa2 (diff)
download: bcm5719-llvm-cb58558c2f680199ae7d9085c5f58d5e127e6f57.tar.gz
bcm5719-llvm-cb58558c2f680199ae7d9085c5f58d5e127e6f57.zip
6 files changed, 187 insertions, 0 deletions
diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll
index 6e5c679e3d9..816f75310e3 100644
--- a/llvm/test/Other/new-pm-defaults.ll
+++ b/llvm/test/Other/new-pm-defaults.ll
@@ -78,6 +78,7 @@
 ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass
 ; CHECK-O-NEXT: Finished llvm::Function pass manager run.
 ; CHECK-O-NEXT: Running pass: IPSCCPPass
+; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
 ; CHECK-O-NEXT: Running pass: GlobalOptPass
 ; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PromotePass>
 ; CHECK-O-NEXT: Running pass: DeadArgumentEliminationPass
diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll
index e450a8eeb3b..fc52f70ff4c 100644
--- a/llvm/test/Other/new-pm-lto-defaults.ll
+++ b/llvm/test/Other/new-pm-lto-defaults.ll
@@ -34,6 +34,7 @@
 ; CHECK-O2-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Function
 ; CHECK-O2-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
 ; CHECK-O2-NEXT: Running pass: IPSCCPPass
+; CHECK-O2-NEXT: Running pass: CalledValuePropagationPass
 ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}PostOrderFunctionAttrsPass>
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC
 ; CHECK-O1-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Function
diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll
index 47074adbcb2..7d40ef3eea2 100644
--- a/llvm/test/Other/new-pm-thinlto-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-defaults.ll
@@ -74,6 +74,7 @@
 ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass
 ; CHECK-O-NEXT: Finished llvm::Function pass manager run.
 ; CHECK-O-NEXT: Running pass: IPSCCPPass
+; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
 ; CHECK-O-NEXT: Running pass: GlobalOptPass
 ; CHECK-O-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PromotePass>
 ; CHECK-O-NEXT: Running pass: DeadArgumentEliminationPass
diff --git a/llvm/test/Transforms/CalledValuePropagation/simple-arguments.ll b/llvm/test/Transforms/CalledValuePropagation/simple-arguments.ll
new file mode 100644
index 00000000000..34274f3b348
--- /dev/null
+++ b/llvm/test/Transforms/CalledValuePropagation/simple-arguments.ll
@@ -0,0 +1,83 @@
+; RUN: opt -called-value-propagation -S < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnueabi"
+
+
+; This test checks that we propagate the functions through arguments and attach
+; !callees metadata to the call. Such metadata can enable optimizations of this
+; code sequence.
+;
+; For example, the code below a illustrates a contrived sort-like algorithm
+; that accepts a pointer to a comparison function. Since the indirect call to
+; the comparison function has only two targets, the call can be promoted to two
+; direct calls using an if-then-else. The loop can then be unswitched and the
+; called functions inlined. This essentially produces two loops, once
+; specialized for each comparison.
+;
+; CHECK:  %tmp3 = call i1 %cmp(i64* %tmp1, i64* %tmp2), !callees ![[MD:[0-9]+]]
+; CHECK: ![[MD]] = !{i1 (i64*, i64*)* @ugt, i1 (i64*, i64*)* @ule}
+;
+define void @test_argument(i64* %x, i64 %n, i1 %flag) {
+entry:
+  %tmp0 = sub i64 %n, 1
+  br i1 %flag, label %then, label %else
+
+then:
+  call void @arrange_data(i64* %x, i64 %tmp0, i1 (i64*, i64*)* @ugt)
+  br label %merge
+
+else:
+  call void @arrange_data(i64* %x, i64 %tmp0, i1 (i64*, i64*)* @ule)
+  br label %merge
+
+merge:
+  ret void
+}
+
+define internal void @arrange_data(i64* %x, i64 %n, i1 (i64*, i64*)* %cmp) {
+entry:
+  %tmp0 = icmp eq i64 %n, 1
+  br i1 %tmp0, label %merge, label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %cmp.false ]
+  %i.next = add nuw nsw i64 %i, 1
+  %tmp1 = getelementptr inbounds i64, i64* %x, i64 %i
+  %tmp2 = getelementptr inbounds i64, i64* %x, i64 %i.next
+  %tmp3 = call i1 %cmp(i64* %tmp1, i64* %tmp2)
+  br i1 %tmp3, label %cmp.true, label %cmp.false
+
+cmp.true:
+  call void @swap(i64* %tmp1, i64* %tmp2)
+  br label %cmp.false
+
+cmp.false:
+  %cond = icmp slt i64 %i.next, %n
+  br i1 %cond, label %for.body, label %for.end
+
+for.end:
+  %tmp4 = sub i64 %n, 1
+  call void @arrange_data(i64* %x, i64 %tmp4, i1 (i64*, i64*)* %cmp)
+  br label %merge
+
+merge:
+  ret void
+}
+
+define internal i1 @ugt(i64* %a, i64* %b) {
+entry:
+  %tmp0 = load i64, i64* %a
+  %tmp1 = load i64, i64* %b
+  %tmp2 = icmp ugt i64 %tmp0, %tmp1
+  ret i1 %tmp2
+}
+
+define internal i1 @ule(i64* %a, i64* %b) {
+entry:
+  %tmp0 = load i64, i64* %a
+  %tmp1 = load i64, i64* %b
+  %tmp2 = icmp ule i64 %tmp0, %tmp1
+  ret i1 %tmp2
+}
+
+declare void @swap(i64*, i64*)
diff --git a/llvm/test/Transforms/CalledValuePropagation/simple-memory.ll b/llvm/test/Transforms/CalledValuePropagation/simple-memory.ll
new file mode 100644
index 00000000000..e42f10c1436
--- /dev/null
+++ b/llvm/test/Transforms/CalledValuePropagation/simple-memory.ll
@@ -0,0 +1,62 @@
+; RUN: opt -called-value-propagation -S < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnueabi"
+
+@global_function = internal unnamed_addr global void ()* null, align 8
+@global_array = common unnamed_addr global i64* null, align 8
+
+; This test checks that we propagate the functions through an internal global
+; variable, and attach !callees metadata to the call. Such metadata can enable
+; optimizations of this code sequence.
+;
+; For example, since both of the targeted functions have the "nounwind" and
+; "readnone" function attributes, LICM can be made to move the call and the
+; function pointer load outside the loop. This would then enable the loop
+; vectorizer to vectorize the sum reduction.
+;
+; CHECK: call void %tmp0(), !callees ![[MD:[0-9]+]]
+; CHECK: ![[MD]] = !{void ()* @invariant_1, void ()* @invariant_2}
+;
+define i64 @test_memory_entry(i64 %n, i1 %flag) {
+entry:
+  br i1 %flag, label %then, label %else
+
+then:
+  store void ()* @invariant_1, void ()** @global_function
+  br label %merge
+
+else:
+  store void ()* @invariant_2, void ()** @global_function
+  br label %merge
+
+merge:
+  %tmp1 = call i64 @test_memory(i64 %n)
+  ret i64 %tmp1
+}
+
+define internal i64 @test_memory(i64 %n) {
+entry:
+  %array = load i64*, i64** @global_array
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %r = phi i64 [ 0, %entry ], [ %tmp3, %for.body ]
+  %tmp0 = load void ()*, void ()** @global_function
+  call void %tmp0()
+  %tmp1 = getelementptr inbounds i64, i64* %array, i64 %i
+  %tmp2 = load i64, i64* %tmp1
+  %tmp3 = add i64 %tmp2, %r
+  %i.next = add nuw nsw i64 %i, 1
+  %cond = icmp slt i64 %i.next, %n
+  br i1 %cond, label %for.body, label %for.end
+
+for.end:
+  %tmp4 = phi i64 [ %tmp3, %for.body ]
+  ret i64 %tmp4
+}
+
+declare void @invariant_1() #0
+declare void @invariant_2() #0
+
+attributes #0 = { nounwind readnone }
diff --git a/llvm/test/Transforms/CalledValuePropagation/simple-select.ll b/llvm/test/Transforms/CalledValuePropagation/simple-select.ll
new file mode 100644
index 00000000000..3d6c7dad7c8
--- /dev/null
+++ b/llvm/test/Transforms/CalledValuePropagation/simple-select.ll
@@ -0,0 +1,39 @@
+; RUN: opt -called-value-propagation -S < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnueabi"
+
+@global_function = internal unnamed_addr global void ()* null, align 8
+@global_scalar = internal unnamed_addr global i64 zeroinitializer
+
+; This test checks that we propagate the functions through a select
+; instruction, and attach !callees metadata to the call. Such metadata can
+; enable optimizations of this code sequence.
+;
+; For example, since both of the targeted functions have the "norecurse"
+; attribute, the function attributes pass can be made to infer that
+; "@test_select" is also norecurse. This would allow the globals optimizer to
+; localize "@global_scalar". The function could then be further simplified to
+; always return the constant "1", eliminating the load and store instructions.
+;
+; CHECK: call void %tmp0(), !callees ![[MD:[0-9]+]]
+; CHECK: ![[MD]] = !{void ()* @norecurse_1, void ()* @norecurse_2}
+;
+define i64 @test_select_entry(i1 %flag) {
+entry:
+  %tmp0 = call i64 @test_select(i1 %flag)
+  ret i64 %tmp0
+}
+
+define internal i64 @test_select(i1 %flag) {
+entry:
+  %tmp0 = select i1 %flag, void ()* @norecurse_1, void ()* @norecurse_2
+  store i64 1, i64* @global_scalar
+  call void %tmp0()
+  %tmp1 = load i64, i64* @global_scalar
+  ret i64 %tmp1
+}
+
+declare void @norecurse_1() #0
+declare void @norecurse_2() #0
+
+attributes #0 = { norecurse }
author	Matthew Simpson <mssimpso@codeaurora.org>	2017-10-25 13:40:08 +0000
committer	Matthew Simpson <mssimpso@codeaurora.org>	2017-10-25 13:40:08 +0000
commit	cb58558c2f680199ae7d9085c5f58d5e127e6f57 (patch)
tree	460a2ada335c943a548094a25f65209bbaec4dd8 /llvm/test
parent	7af3edc4f4f19f0b216378b8e8c8677ce8dbafa2 (diff)
download	bcm5719-llvm-cb58558c2f680199ae7d9085c5f58d5e127e6f57.tar.gz bcm5719-llvm-cb58558c2f680199ae7d9085c5f58d5e127e6f57.zip