[LoopInstSimplify] Re-implement the core logic of loop-instsimplify to

be both simpler and substantially more efficient. Rather than use a hand-rolled iteration technique that isn't quite the same as RPO, use the pre-built RPO loop body traversal utility. Once visiting the loop body in RPO, we can assert that we visit defs before uses reliably. When this is the case, the only need to iterate is when simplifying a def that is used by a PHI node along a back-edge. With this patch, the first pass over the loop body is just a complete simplification of every instruction across the loop body. When we encounter a use of a simplified instruction that stems from a PHI node in the loop body that has already been visited (due to some cyclic CFG, potentially the loop itself, or a nested loop, or unstructured control flow), we recall that specific PHI node for the second iteration. Nothing else needs to be preserved from iteration to iteration. On the second and later iterations, only instructions known to have simplified inputs are considered, each time starting from a set of PHIs that had simplified inputs along the backedges. Dead instructions are collected along the way, but deleted in a batch at the end of each iteration making the iterations themselves substantially simpler. This uses a new batch API for recursively deleting dead instructions. This alsa changes the routine to visit subloops. Because simplification is fundamentally transitive, we may need to visit the entire loop body, including subloops, to handle knock-on simplification. I've added a basic test file that helps demonstrate that all of these changes work. It includes both straight-forward loops with simplifications as well as interesting PHI-structures, CFG-structures, and a nested loop case. Differential Revision: https://reviews.llvm.org/D47407 llvm-svn: 333461
author: Chandler Carruth <chandlerc@gmail.com> 2018-05-29 20:15:38 +0000
committer: Chandler Carruth <chandlerc@gmail.com> 2018-05-29 20:15:38 +0000
commit: 4cbcbb0761325099ff63927ef8bf36e97dc43c7f (patch)
tree: c297618bc6943669c145e6df20ef21e2130e95ee /llvm/test/Transforms/LoopInstSimplify/basic.ll
parent: 5439b3d1e5b62892651b82417d8adca10b4651a7 (diff)
download: bcm5719-llvm-4cbcbb0761325099ff63927ef8bf36e97dc43c7f.tar.gz
bcm5719-llvm-4cbcbb0761325099ff63927ef8bf36e97dc43c7f.zip
1 files changed, 164 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopInstSimplify/basic.ll b/llvm/test/Transforms/LoopInstSimplify/basic.ll
new file mode 100644
index 00000000000..7e6cafd8557
--- /dev/null
+++ b/llvm/test/Transforms/LoopInstSimplify/basic.ll
@@ -0,0 +1,164 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S %s -passes=loop-instsimplify | FileCheck %s
+
+; Test very basic folding and propagation occurs within a loop body. This should
+; collapse to the loop iteration structure and the LCSSA PHI node.
+define i32 @test1(i32 %n, i32 %x) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[I_NEXT]] = add nsw i32 [[I]], 1
+; CHECK-NEXT:    [[I_CMP:%.*]] = icmp slt i32 [[I_NEXT]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[I_CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[X_LCSSA:%.*]] = phi i32 [ [[X:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    ret i32 [[X_LCSSA]]
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  %x.add = add nsw i32 %x, 0
+  %x.sub = sub i32 %x.add, 0
+  %x.and = and i32 %x.sub, -1
+  %i.next = add nsw i32 %i, 1
+  %i.cmp = icmp slt i32 %i.next, %n
+  br i1 %i.cmp, label %loop, label %exit
+
+exit:
+  %x.lcssa = phi i32 [ %x.and, %loop ]
+  ret i32 %x.lcssa
+}
+
+; Test basic loop structure that still has a simplification feed a prior PHI.
+define i32 @test2(i32 %n, i32 %x) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[I_NEXT]] = add nsw i32 [[I]], 1
+; CHECK-NEXT:    [[I_CMP:%.*]] = icmp slt i32 [[I_NEXT]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[I_CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[X_LCSSA:%.*]] = phi i32 [ [[X:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    ret i32 [[X_LCSSA]]
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  %x.loop = phi i32 [ %x, %entry ], [ %x.next, %loop ]
+  %x.next = add nsw i32 %x.loop, 0
+  %i.next = add nsw i32 %i, 1
+  %i.cmp = icmp slt i32 %i.next, %n
+  br i1 %i.cmp, label %loop, label %exit
+
+exit:
+  %x.lcssa = phi i32 [ %x.loop, %loop ]
+  ret i32 %x.lcssa
+}
+
+; Test a diamond CFG with inner PHI nodes.
+define i32 @test3(i32 %n, i32 %x) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT:    [[X_CMP:%.*]] = icmp slt i32 [[I]], 42
+; CHECK-NEXT:    br i1 [[X_CMP]], label [[LOOP_LHS:%.*]], label [[LOOP_RHS:%.*]]
+; CHECK:       loop.lhs:
+; CHECK-NEXT:    br label [[LOOP_LATCH]]
+; CHECK:       loop.rhs:
+; CHECK-NEXT:    br label [[LOOP_LATCH]]
+; CHECK:       loop.latch:
+; CHECK-NEXT:    [[I_NEXT]] = add nsw i32 [[I]], 1
+; CHECK-NEXT:    [[I_CMP:%.*]] = icmp slt i32 [[I_NEXT]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[I_CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[X_LCSSA:%.*]] = phi i32 [ [[X:%.*]], [[LOOP_LATCH]] ]
+; CHECK-NEXT:    ret i32 [[X_LCSSA]]
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop.latch ]
+  %x.loop = phi i32 [ %x, %entry ], [ %x.phi, %loop.latch ]
+  %x.add = add nsw i32 %x.loop, 0
+  %x.cmp = icmp slt i32 %i, 42
+  br i1 %x.cmp, label %loop.lhs, label %loop.rhs
+
+loop.lhs:
+  %x.l.add = add nsw i32 %x.add, 0
+  br label %loop.latch
+
+loop.rhs:
+  %x.r.sub = sub nsw i32 %x.add, 0
+  br label %loop.latch
+
+loop.latch:
+  %x.phi = phi i32 [ %x.l.add, %loop.lhs ], [ %x.r.sub, %loop.rhs ]
+  %i.next = add nsw i32 %i, 1
+  %i.cmp = icmp slt i32 %i.next, %n
+  br i1 %i.cmp, label %loop, label %exit
+
+exit:
+  %x.lcssa = phi i32 [ %x.loop, %loop.latch ]
+  ret i32 %x.lcssa
+}
+
+; Test an inner loop that is only simplified when processing the outer loop, and
+; an outer loop only simplified when processing the inner loop.
+define i32 @test4(i32 %n, i32 %m, i32 %x) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT:    br label [[LOOP_INNER:%.*]]
+; CHECK:       loop.inner:
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[J_NEXT:%.*]], [[LOOP_INNER]] ]
+; CHECK-NEXT:    [[J_NEXT]] = add nsw i32 [[J]], 1
+; CHECK-NEXT:    [[J_CMP:%.*]] = icmp slt i32 [[J_NEXT]], [[M:%.*]]
+; CHECK-NEXT:    br i1 [[J_CMP]], label [[LOOP_INNER]], label [[LOOP_LATCH]]
+; CHECK:       loop.latch:
+; CHECK-NEXT:    [[I_NEXT]] = add nsw i32 [[I]], 1
+; CHECK-NEXT:    [[I_CMP:%.*]] = icmp slt i32 [[I_NEXT]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[I_CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[X_LCSSA:%.*]] = phi i32 [ [[X:%.*]], [[LOOP_LATCH]] ]
+; CHECK-NEXT:    ret i32 [[X_LCSSA]]
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop.latch ]
+  %x.loop = phi i32 [ %x, %entry ], [ %x.inner.lcssa, %loop.latch ]
+  %x.add = add nsw i32 %x.loop, 0
+  br label %loop.inner
+
+loop.inner:
+  %j = phi i32 [ 0, %loop ], [ %j.next, %loop.inner ]
+  %x.inner.loop = phi i32 [ %x.add, %loop ], [ %x.inner.add, %loop.inner ]
+  %x.inner.add = add nsw i32 %x.inner.loop, 0
+  %j.next = add nsw i32 %j, 1
+  %j.cmp = icmp slt i32 %j.next, %m
+  br i1 %j.cmp, label %loop.inner, label %loop.latch
+
+loop.latch:
+  %x.inner.lcssa = phi i32 [ %x.inner.loop, %loop.inner ]
+  %i.next = add nsw i32 %i, 1
+  %i.cmp = icmp slt i32 %i.next, %n
+  br i1 %i.cmp, label %loop, label %exit
+
+exit:
+  %x.lcssa = phi i32 [ %x.loop, %loop.latch ]
+  ret i32 %x.lcssa
+}
author	Chandler Carruth <chandlerc@gmail.com>	2018-05-29 20:15:38 +0000
committer	Chandler Carruth <chandlerc@gmail.com>	2018-05-29 20:15:38 +0000
commit	4cbcbb0761325099ff63927ef8bf36e97dc43c7f (patch)
tree	c297618bc6943669c145e6df20ef21e2130e95ee /llvm/test/Transforms/LoopInstSimplify/basic.ll
parent	5439b3d1e5b62892651b82417d8adca10b4651a7 (diff)
download	bcm5719-llvm-4cbcbb0761325099ff63927ef8bf36e97dc43c7f.tar.gz bcm5719-llvm-4cbcbb0761325099ff63927ef8bf36e97dc43c7f.zip