summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Kruse <llvm@meinersbur.de>2017-07-31 19:46:21 +0000
committerMichael Kruse <llvm@meinersbur.de>2017-07-31 19:46:21 +0000
commit9f6e41cdbabd7cfcfe7096544a61bb41dfcef746 (patch)
treecf376519a1bae28232a2620196a48237608e56f2
parent8d927b6bf908503e7d2f7aca22d6fec2a15fd44b (diff)
downloadbcm5719-llvm-9f6e41cdbabd7cfcfe7096544a61bb41dfcef746.tar.gz
bcm5719-llvm-9f6e41cdbabd7cfcfe7096544a61bb41dfcef746.zip
[ForwardOpTree] Support synthesizable values.
This allows -polly-optree to move instructions that depend on synthesizable values. The difficulty for synthesizable values is that their value depends on the location. When it is moved over a loop header, and the SCEV expression depends on the loop induction variable (SCEVAddRecExpr), it would use the current induction variable instead of the last one. At the moment we cannot forward PHI nodes such that crossing the header of loops referenced by SCEVAddRecExpr is not possible (assuming the loop header has at least two incoming blocks: for entering the loop and the backedge, such any instruction to be forwarded must have a phi between use and definition). A remaining issue is when the forwarded value is used after the loop, but is only synthesizable inside the loop. This happens e.g. if ScalarEvolution is unable to determine the number of loop iterations or the initial loop value. We do not forward in this situation. Differential Revision: https://reviews.llvm.org/D36102 llvm-svn: 309609
-rw-r--r--polly/lib/Transform/ForwardOpTree.cpp40
-rw-r--r--polly/test/ForwardOpTree/forward_synthesizable_definloop.ll80
-rw-r--r--polly/test/ForwardOpTree/forward_synthesizable_indvar.ll62
-rw-r--r--polly/test/ForwardOpTree/forward_synthesizable_useinloop.ll80
-rw-r--r--polly/test/ForwardOpTree/noforward_synthesizable.ll43
-rw-r--r--polly/test/ForwardOpTree/noforward_synthesizable_unknownit.ll50
6 files changed, 302 insertions, 53 deletions
diff --git a/polly/lib/Transform/ForwardOpTree.cpp b/polly/lib/Transform/ForwardOpTree.cpp
index 81ace39b8de..852fc188542 100644
--- a/polly/lib/Transform/ForwardOpTree.cpp
+++ b/polly/lib/Transform/ForwardOpTree.cpp
@@ -140,13 +140,6 @@ private:
ForwardingDecision canForwardTree(ScopStmt *TargetStmt, Value *UseVal,
ScopStmt *UseStmt, Loop *UseLoop,
bool DoIt) {
-
- // PHis are not yet supported.
- if (isa<PHINode>(UseVal)) {
- DEBUG(dbgs() << " Cannot forward PHI: " << *UseVal << "\n");
- return FD_CannotForward;
- }
-
VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
switch (VUse.getKind()) {
case VirtualUse::Constant:
@@ -157,10 +150,31 @@ private:
return FD_DidForward;
return FD_CanForwardLeaf;
- case VirtualUse::Synthesizable:
- // Not supported yet.
- DEBUG(dbgs() << " Cannot forward synthesizable: " << *UseVal << "\n");
+ case VirtualUse::Synthesizable: {
+ // ScopExpander will take care for of generating the code at the new
+ // location.
+ if (DoIt)
+ return FD_DidForward;
+
+ // Check if the value is synthesizable at the new location as well. This
+ // might be possible when leaving a loop for which ScalarEvolution is
+ // unable to derive the exit value for.
+ // TODO: If there is a LCSSA PHI at the loop exit, use that one.
+ // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we
+ // do not forward past its loop header. This would require us to use a
+ // previous loop induction variable instead the current one. We currently
+ // do not allow forwarding PHI nodes, thus this should never occur (the
+ // only exception where no phi is necessary being an unreachable loop
+ // without edge from the outside).
+ VirtualUse TargetUse = VirtualUse::create(
+ S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true);
+ if (TargetUse.getKind() == VirtualUse::Synthesizable)
+ return FD_CanForwardLeaf;
+
+ DEBUG(dbgs() << " Synthesizable would not be synthesizable anymore: "
+ << *UseVal << "\n");
return FD_CannotForward;
+ }
case VirtualUse::ReadOnly:
// Note that we cannot return FD_CanForwardTree here. With a operand tree
@@ -185,6 +199,12 @@ private:
case VirtualUse::Inter:
auto Inst = cast<Instruction>(UseVal);
+ // PHIs, unless synthesizable, are not yet supported.
+ if (isa<PHINode>(Inst)) {
+ DEBUG(dbgs() << " Cannot forward PHI: " << *UseVal << "\n");
+ return FD_CannotForward;
+ }
+
// Compatible instructions must satisfy the following conditions:
// 1. Idempotent (instruction will be copied, not moved; although its
// original instance might be removed by simplification)
diff --git a/polly/test/ForwardOpTree/forward_synthesizable_definloop.ll b/polly/test/ForwardOpTree/forward_synthesizable_definloop.ll
new file mode 100644
index 00000000000..05d375a9a75
--- /dev/null
+++ b/polly/test/ForwardOpTree/forward_synthesizable_definloop.ll
@@ -0,0 +1,80 @@
+; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines
+;
+; Copy %val to bodyB, assuming the exit value of %i.
+;
+; for (int j = 0; j < n; j += 1) {
+; double val;
+; for (int i = 0; i < 128; i += 1) {
+; bodyA:
+; val = j;
+; }
+;
+; bodyB:
+; A[0] = val;
+; }
+;
+define void @func(i32 %n, double* noalias nonnull %A) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %inner.for, label %exit
+
+ inner.for:
+ %i = phi i32 [0, %for], [%i.inc, %inner.inc]
+ br label %bodyA
+
+
+ bodyA:
+ %val = sitofp i32 %i to double
+ br label %inner.inc
+
+
+ inner.inc:
+ %i.inc = add nuw nsw i32 %i, 1
+ %i.cmp = icmp slt i32 %i.inc, 128
+ br i1 %i.cmp, label %inner.for, label %inner.exit
+
+ inner.exit:
+ br label %bodyB
+
+
+ bodyB:
+ store double %val, double* %A
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: Statistics {
+; CHECK: Instructions copied: 1
+; CHECK: Operand trees forwarded: 1
+; CHECK: Statements with forwarded operand trees: 1
+; CHECK: }
+
+; CHECK: After statements {
+; CHECK-NEXT: Stmt_bodyA
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0, i1] -> MemRef_val[] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %val = sitofp i32 %i to double
+; CHECK-NEXT: }
+; CHECK-NEXT: Stmt_bodyB
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_A[0] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %val = sitofp i32 %i to double
+; CHECK-NEXT: store double %val, double* %A
+; CHECK-NEXT: }
+; CHECK-NEXT: }
diff --git a/polly/test/ForwardOpTree/forward_synthesizable_indvar.ll b/polly/test/ForwardOpTree/forward_synthesizable_indvar.ll
new file mode 100644
index 00000000000..b034cd1a5ae
--- /dev/null
+++ b/polly/test/ForwardOpTree/forward_synthesizable_indvar.ll
@@ -0,0 +1,62 @@
+; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines
+;
+; Test support for (synthesizable) inducation variables.
+;
+; for (int j = 0; j < n; j += 1) {
+; bodyA:
+; double val = j;
+;
+; bodyB:
+; A[0] = val;
+; }
+;
+define void @func(i32 %n, double* noalias nonnull %A) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %bodyA, label %exit
+
+ bodyA:
+ %val = sitofp i32 %j to double
+ br label %bodyB
+
+ bodyB:
+ store double %val, double* %A
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: Statistics {
+; CHECK: Instructions copied: 1
+; CHECK: Operand trees forwarded: 1
+; CHECK: Statements with forwarded operand trees: 1
+; CHECK: }
+
+; CHECK: After statements {
+; CHECK-NEXT: Stmt_bodyA
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_val[] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %val = sitofp i32 %j to double
+; CHECK-NEXT: }
+; CHECK-NEXT: Stmt_bodyB
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_A[0] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %val = sitofp i32 %j to double
+; CHECK-NEXT: store double %val, double* %A
+; CHECK-NEXT: }
+; CHECK-NEXT: }
diff --git a/polly/test/ForwardOpTree/forward_synthesizable_useinloop.ll b/polly/test/ForwardOpTree/forward_synthesizable_useinloop.ll
new file mode 100644
index 00000000000..c43787d8a58
--- /dev/null
+++ b/polly/test/ForwardOpTree/forward_synthesizable_useinloop.ll
@@ -0,0 +1,80 @@
+; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines
+;
+; Synthesizable values defined outside of a loop can be used
+; inside the loop.
+;
+; for (int j = 0; j < n; j += 1) {
+; bodyA:
+; double val = j;
+;
+; for (int i = 0; i < n; i += 1) {
+; bodyB:
+; A[0] = val;
+; }
+; }
+;
+define void @func(i32 %n, double* noalias nonnull %A) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %bodyA, label %exit
+
+ bodyA:
+ %val = sitofp i32 %j to double
+ br label %inner.for
+
+
+ inner.for:
+ %i = phi i32 [0, %bodyA], [%i.inc, %inner.inc]
+ %i.cmp = icmp slt i32 %i, %n
+ br i1 %i.cmp, label %bodyB, label %inner.exit
+
+
+ bodyB:
+ store double %val, double* %A
+ br label %inner.inc
+
+
+ inner.inc:
+ %i.inc = add nuw nsw i32 %i, 1
+ br label %inner.for
+
+ inner.exit:
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: Statistics {
+; CHECK: Instructions copied: 1
+; CHECK: Operand trees forwarded: 1
+; CHECK: Statements with forwarded operand trees: 1
+; CHECK: }
+
+; CHECK: After statements {
+; CHECK-NEXT: Stmt_bodyA
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_val[] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %val = sitofp i32 %j to double
+; CHECK-NEXT: }
+; CHECK-NEXT: Stmt_bodyB
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [n] -> { Stmt_bodyB[i0, i1] -> MemRef_A[0] };
+; CHECK-NEXT: Instructions {
+; CHECK-NEXT: %val = sitofp i32 %j to double
+; CHECK-NEXT: store double %val, double* %A
+; CHECK-NEXT: }
+; CHECK-NEXT: }
diff --git a/polly/test/ForwardOpTree/noforward_synthesizable.ll b/polly/test/ForwardOpTree/noforward_synthesizable.ll
deleted file mode 100644
index 593a7029829..00000000000
--- a/polly/test/ForwardOpTree/noforward_synthesizable.ll
+++ /dev/null
@@ -1,43 +0,0 @@
-; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines
-;
-; %val has a synthesizable argument that we currently do not support.
-;
-; for (int j = 0; j < n; j += 1) {
-; bodyA:
-; double v = j;
-; double val = 21.0 + 21.0;
-;
-; bodyB:
-; A[0] = val;
-; }
-;
-define void @func(i32 %n, double* noalias nonnull %A) {
-entry:
- br label %for
-
-for:
- %j = phi i32 [0, %entry], [%j.inc, %inc]
- %j.cmp = icmp slt i32 %j, %n
- br i1 %j.cmp, label %bodyA, label %exit
-
- bodyA:
- %val = sitofp i32 %j to double
- br label %bodyB
-
- bodyB:
- store double %val, double* %A
- br label %inc
-
-inc:
- %j.inc = add nuw nsw i32 %j, 1
- br label %for
-
-exit:
- br label %return
-
-return:
- ret void
-}
-
-
-; CHECK: ForwardOpTree executed, but did not modify anything
diff --git a/polly/test/ForwardOpTree/noforward_synthesizable_unknownit.ll b/polly/test/ForwardOpTree/noforward_synthesizable_unknownit.ll
new file mode 100644
index 00000000000..0a7c80a0cd9
--- /dev/null
+++ b/polly/test/ForwardOpTree/noforward_synthesizable_unknownit.ll
@@ -0,0 +1,50 @@
+; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines
+;
+; Do not try to forward %i.trunc, it is not synthesizable in %body.
+;
+define void @func(i32 %n, i32* noalias nonnull %A) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ %zero = sext i32 0 to i64
+ br i1 %j.cmp, label %inner.for, label %exit
+
+
+ ; This loop has some unusual properties:
+ ; * It has a known iteration count (8), therefore SCoP-compatible.
+ ; * %i.trunc is synthesizable within the loop ({1,+,1}<%while.body>).
+ ; * %i.trunc is not synthesizable outside of the loop, because its value is
+ ; unknown when exiting.
+ ; (should be 8, but ScalarEvolution currently seems unable to derive that)
+ ;
+ ; ScalarEvolution currently seems to not able to handle the %zero.
+ ; If it becomes more intelligent, there might be other such loop constructs.
+ inner.for:
+ %i = phi i64 [%zero, %for], [%i.inc, %inner.for]
+ %i.inc = add nuw nsw i64 %i, 1
+ %i.trunc = trunc i64 %i.inc to i32
+ %i.and = and i32 %i.trunc, 7
+ %inner.cond = icmp eq i32 %i.and, 0
+ br i1 %inner.cond, label %body, label %inner.for
+
+ body:
+ store i32 %i.trunc, i32* %A
+ br label %inc
+
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+; CHECK: ForwardOpTree executed, but did not modify anything
OpenPOWER on IntegriCloud