summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTobias Grosser <tobias@grosser.es>2015-09-30 13:36:54 +0000
committerTobias Grosser <tobias@grosser.es>2015-09-30 13:36:54 +0000
commitaff56c8a788f46bb988d0c7e934783cefe88fff8 (patch)
tree25f5808884cbfee6b71f73321a88627107b94850
parent87e1da6205a615a0fd9697fed63a35ec27655200 (diff)
downloadbcm5719-llvm-aff56c8a788f46bb988d0c7e934783cefe88fff8.tar.gz
bcm5719-llvm-aff56c8a788f46bb988d0c7e934783cefe88fff8.zip
Reapply "BlockGenerator: Generate synthesisable instructions only on-demand"
Instructions which we can synthesis from a SCEV expression are not generated directly, but only when they are used as an operand of another instruction. This avoids generating unnecessary instructions and works more reliably than first inserting them and then deleting them later on. This commit was reverted in r248860 due to a remaining miscompile, where we forgot to synthesis the operand values that were referenced from scalar writes. test/Isl/CodeGen/scalar-store-from-same-bb.ll tests that we do this now correctly. llvm-svn: 248900
-rw-r--r--polly/include/polly/CodeGen/BlockGenerators.h19
-rw-r--r--polly/lib/CodeGen/BlockGenerators.cpp63
-rw-r--r--polly/test/Isl/CodeGen/MemAccess/different_types.ll8
-rw-r--r--polly/test/Isl/CodeGen/OpenMP/new_multidim_access.ll8
-rw-r--r--polly/test/Isl/CodeGen/exprModDiv.ll12
-rw-r--r--polly/test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll1
-rw-r--r--polly/test/Isl/CodeGen/scalar-store-from-same-bb.ll32
7 files changed, 80 insertions, 63 deletions
diff --git a/polly/include/polly/CodeGen/BlockGenerators.h b/polly/include/polly/CodeGen/BlockGenerators.h
index 25187b0db3e..0b6885761a5 100644
--- a/polly/include/polly/CodeGen/BlockGenerators.h
+++ b/polly/include/polly/CodeGen/BlockGenerators.h
@@ -378,9 +378,13 @@ protected:
///
/// @param Stmt The statement we generate code for.
/// @param BB The basic block we generate code for.
+ /// @param LTS A mapping from loops virtual canonical induction
+ /// variable to their new values
+ /// (for values recalculated in the new ScoP, but not
+ /// within this basic block)
/// @param BBMap A mapping from old values to their new values in this block.
virtual void generateScalarStores(ScopStmt &Stmt, BasicBlock *BB,
- ValueMapT &BBMAp);
+ LoopToScevMapT &LTS, ValueMapT &BBMap);
/// @brief Handle users of @p Inst outside the SCoP.
///
@@ -526,12 +530,17 @@ protected:
///
/// @param ScalarValue The original value needed.
/// @param R The current SCoP region.
+ /// @param Stmt The ScopStmt in which we look up this value.
+ /// @param LTS A mapping from loops virtual canonical induction
+ /// variable to their new values
+ /// (for values recalculated in the new ScoP, but not
+ /// within this basic block)
/// @param BBMap A mapping from old values to their new values
/// (for values recalculated within this basic block).
///
/// @returns The newest version (e.g., reloaded) of the scalar value.
- Value *getNewScalarValue(Value *ScalarValue, const Region &R,
- ValueMapT &BBMap);
+ Value *getNewScalarValue(Value *ScalarValue, const Region &R, ScopStmt &,
+ LoopToScevMapT &LTS, ValueMapT &BBMap);
};
/// @brief Generate a new vector basic block for a polyhedral statement.
@@ -768,8 +777,12 @@ private:
///
/// @param Stmt The statement we generate code for.
/// @param BB The basic block we generate code for.
+ /// @param LTS A mapping from loops virtual canonical induction variable to
+ /// their new values (for values recalculated in the new ScoP,
+ /// but not within this basic block)
/// @param BBMap A mapping from old values to their new values in this block.
virtual void generateScalarStores(ScopStmt &Stmt, BasicBlock *BB,
+ LoopToScevMapT &LTS,
ValueMapT &BBMAp) override;
/// @brief Copy a single PHI instruction.
diff --git a/polly/lib/CodeGen/BlockGenerators.cpp b/polly/lib/CodeGen/BlockGenerators.cpp
index 652ad8b5dec..94163c3c4bc 100644
--- a/polly/lib/CodeGen/BlockGenerators.cpp
+++ b/polly/lib/CodeGen/BlockGenerators.cpp
@@ -285,8 +285,7 @@ void BlockGenerator::copyInstruction(ScopStmt &Stmt, const Instruction *Inst,
Loop *L = getLoopForInst(Inst);
if ((Stmt.isBlockStmt() || !Stmt.getRegion()->contains(L)) &&
canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion())) {
- Value *NewValue = getNewValue(Stmt, Inst, BBMap, LTS, L);
- BBMap[Inst] = NewValue;
+ // Synthesizable statements will be generated on-demand.
return;
}
@@ -316,28 +315,6 @@ void BlockGenerator::copyInstruction(ScopStmt &Stmt, const Instruction *Inst,
copyInstScalar(Stmt, Inst, BBMap, LTS);
}
-/// @brief Remove trivially dead instructions from BB
-///
-/// This function drops trivially dead instructions from a basic block. It
-/// on purpose does _not_ recurse into other BBs even if the deletion of
-/// instructions in this basic block can make instructions in other basic blocks
-/// triviall dead.
-static void simplifyInstsInBlockOnly(BasicBlock *BB) {
- auto BI = --BB->end(), BE = BB->begin();
- bool Exit = false;
- while (!Exit) {
- auto ToRemove = BI;
- if (BI != BE)
- BI--;
- else
- Exit = true;
-
- if (!isInstructionTriviallyDead(ToRemove))
- continue;
- ToRemove->eraseFromParent();
- }
-}
-
void BlockGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
isl_id_to_ast_expr *NewAccesses) {
assert(Stmt.isBlockStmt() &&
@@ -347,16 +324,6 @@ void BlockGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
BasicBlock *BB = Stmt.getBasicBlock();
copyBB(Stmt, BB, BBMap, LTS, NewAccesses);
-
- auto CopyBB = Builder.GetInsertBlock();
- // Delete trivially dead instructions in CopyBB, but not in any other BB.
- // Only for copyBB we know that there will _never_ be any future uses of
- // instructions that have no use after copyBB has finished. Other instructions
- // in the AST that have been generated by IslNodeBuilder may look dead at
- // the moment, but may possibly still be referenced by GlobalMaps. If we
- // delete them now, later uses would break surprisingly.
- simplifyInstsInBlockOnly(CopyBB);
- Builder.SetInsertPoint(CopyBB->getTerminator());
}
BasicBlock *BlockGenerator::splitBB(BasicBlock *BB) {
@@ -386,7 +353,7 @@ void BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *CopyBB,
// After a basic block was copied store all scalars that escape this block
// in their alloca. First the scalars that have dependences inside the SCoP,
// then the ones that might escape the SCoP.
- generateScalarStores(Stmt, BB, BBMap);
+ generateScalarStores(Stmt, BB, LTS, BBMap);
const Region &R = Stmt.getParent()->getRegion();
for (Instruction &Inst : *BB)
@@ -481,6 +448,7 @@ void BlockGenerator::generateScalarLoads(ScopStmt &Stmt,
}
Value *BlockGenerator::getNewScalarValue(Value *ScalarValue, const Region &R,
+ ScopStmt &Stmt, LoopToScevMapT &LTS,
ValueMapT &BBMap) {
// If the value we want to store is an instruction we might have demoted it
// in order to make it accessible here. In such a case a reload is
@@ -509,6 +477,16 @@ Value *BlockGenerator::getNewScalarValue(Value *ScalarValue, const Region &R,
if (Value *ScalarValueCopy = BBMap.lookup(ScalarValueInst))
return /* Case (3a) */ ScalarValueCopy;
+ if ((Stmt.isBlockStmt() &&
+ Stmt.getBasicBlock() == ScalarValueInst->getParent()) ||
+ (Stmt.isRegionStmt() && Stmt.getRegion()->contains(ScalarValueInst))) {
+ auto SynthesizedValue = trySynthesizeNewValue(
+ Stmt, ScalarValueInst, BBMap, LTS, getLoopForInst(ScalarValueInst));
+
+ if (SynthesizedValue)
+ return SynthesizedValue;
+ }
+
// Case (3b)
Value *Address = getOrCreateScalarAlloca(ScalarValueInst);
ScalarValue = Builder.CreateLoad(Address, Address->getName() + ".reload");
@@ -517,6 +495,7 @@ Value *BlockGenerator::getNewScalarValue(Value *ScalarValue, const Region &R,
}
void BlockGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB,
+ LoopToScevMapT &LTS,
ValueMapT &BBMap) {
const Region &R = Stmt.getParent()->getRegion();
@@ -531,7 +510,7 @@ void BlockGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB,
Value *Val = MA->getAccessValue();
auto *Address = getOrCreateAlloca(*MA);
- Val = getNewScalarValue(Val, R, BBMap);
+ Val = getNewScalarValue(Val, R, Stmt, LTS, BBMap);
Builder.CreateStore(Val, Address);
}
}
@@ -1153,15 +1132,6 @@ void RegionGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
LTS[L] = SE.getUnknown(LoopPHI);
}
- // Delete trivially dead instructions in CopyBB, but not in any other BB.
- // Only for copyBB we know that there will _never_ be any future uses of
- // instructions that have no use after copyBB has finished. Other instructions
- // in the AST that have been generated by IslNodeBuilder may look dead at
- // the moment, but may possibly still be referenced by GlobalMaps. If we
- // delete them now, later uses would break surprisingly.
- for (auto *BB : SeenBlocks)
- simplifyInstsInBlockOnly(BlockMap[BB]);
-
// Reset the old insert point for the build.
Builder.SetInsertPoint(ExitBBCopy->begin());
}
@@ -1181,6 +1151,7 @@ void RegionGenerator::generateScalarLoads(ScopStmt &Stmt,
}
void RegionGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB,
+ LoopToScevMapT &LTS,
ValueMapT &BBMap) {
const Region &R = Stmt.getParent()->getRegion();
@@ -1203,7 +1174,7 @@ void RegionGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB,
auto Address = getOrCreateAlloca(*MA);
- Val = getNewScalarValue(Val, R, BBMap);
+ Val = getNewScalarValue(Val, R, Stmt, LTS, BBMap);
Builder.CreateStore(Val, Address);
}
}
diff --git a/polly/test/Isl/CodeGen/MemAccess/different_types.ll b/polly/test/Isl/CodeGen/MemAccess/different_types.ll
index ffd5bb48610..a45660784e6 100644
--- a/polly/test/Isl/CodeGen/MemAccess/different_types.ll
+++ b/polly/test/Isl/CodeGen/MemAccess/different_types.ll
@@ -9,10 +9,10 @@
; A[i] += 10;
; }
-; CHECK: %polly.access.cast.A1 = bitcast float* %A to i32*
-; CHECK: %5 = sub nsw i64 99, %polly.indvar15
-; CHECK: %polly.access.A19 = getelementptr i32, i32* %polly.access.cast.A18, i64 %5
-; CHECK: %6 = bitcast i32* %polly.access.A19 to float*
+; CHECK: %polly.access.cast.A14 = bitcast float* %A to i32*
+; CHECK: %5 = sub nsw i64 99, %polly.indvar11
+; CHECK: %polly.access.A15 = getelementptr i32, i32* %polly.access.cast.A14, i64 %5
+; CHECK: %6 = bitcast i32* %polly.access.A15 to float*
; CHECK: %tmp14_p_scalar_ = load float, float* %6, align 4, !alias.scope !3, !noalias !4
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/polly/test/Isl/CodeGen/OpenMP/new_multidim_access.ll b/polly/test/Isl/CodeGen/OpenMP/new_multidim_access.ll
index f0b296daf75..622db9b99ac 100644
--- a/polly/test/Isl/CodeGen/OpenMP/new_multidim_access.ll
+++ b/polly/test/Isl/CodeGen/OpenMP/new_multidim_access.ll
@@ -25,11 +25,11 @@
; IR: %polly.access.polly.subfunc.arg.A = getelementptr float, float* %polly.subfunc.arg.A, i64 %polly.access.add.polly.subfunc.arg.A
; IR: %tmp10_p_scalar_ = load float, float* %polly.access.polly.subfunc.arg.A, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_loop_access !3
-; IR: %polly.access.mul.polly.subfunc.arg.A9 = mul i64 %polly.indvar, %polly.subfunc.arg.m
+; IR: %polly.access.mul.polly.subfunc.arg.A8 = mul i64 %polly.indvar, %polly.subfunc.arg.m
; IR: %7 = add nsw i64 %polly.indvar5, 43
-; IR: %polly.access.add.polly.subfunc.arg.A10 = add i64 %polly.access.mul.polly.subfunc.arg.A9, %7
-; IR: %polly.access.polly.subfunc.arg.A11 = getelementptr float, float* %polly.subfunc.arg.A, i64 %polly.access.add.polly.subfunc.arg.A10
-; IR: store float %p_tmp11, float* %polly.access.polly.subfunc.arg.A11, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_
+; IR: %polly.access.add.polly.subfunc.arg.A9 = add i64 %polly.access.mul.polly.subfunc.arg.A8, %7
+; IR: %polly.access.polly.subfunc.arg.A10 = getelementptr float, float* %polly.subfunc.arg.A, i64 %polly.access.add.polly.subfunc.arg.A9
+; IR: store float %p_tmp11, float* %polly.access.polly.subfunc.arg.A10, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @new_multidim_access(i64 %n, i64 %m, float* %A) {
diff --git a/polly/test/Isl/CodeGen/exprModDiv.ll b/polly/test/Isl/CodeGen/exprModDiv.ll
index ce59ad8fdf5..5991fac2019 100644
--- a/polly/test/Isl/CodeGen/exprModDiv.ll
+++ b/polly/test/Isl/CodeGen/exprModDiv.ll
@@ -28,7 +28,7 @@
; each value of i to indeed be mapped to a value.
;
; CHECK: %pexp.p_div_q = udiv i64 %polly.indvar, 127
-; CHECK: %polly.access.B8 = getelementptr float, float* %B, i64 %pexp.p_div_q
+; CHECK: %polly.access.B7 = getelementptr float, float* %B, i64 %pexp.p_div_q
; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d
; A[p + 127 * floord(-p - 1, 127) + 127]
@@ -42,11 +42,11 @@
; CHECK: %19 = mul nsw i64 127, %pexp.fdiv_q.4
; CHECK: %20 = add nsw i64 %p, %19
; CHECK: %21 = add nsw i64 %20, 127
-; CHECK: %polly.access.A10 = getelementptr float, float* %A, i64 %21
+; CHECK: %polly.access.A8 = getelementptr float, float* %A, i64 %21
; A[p / 127]
; CHECK: %pexp.div = sdiv exact i64 %p, 127
-; CHECK: %polly.access.B13 = getelementptr float, float* %B, i64 %pexp.div
+; CHECK: %polly.access.B9 = getelementptr float, float* %B, i64 %pexp.div
; A[i % 128]
; POW2: %pexp.pdiv_r = urem i64 %polly.indvar, 128
@@ -54,7 +54,7 @@
; A[floor(i / 128)]
; POW2: %pexp.p_div_q = udiv i64 %polly.indvar, 128
-; POW2: %polly.access.B8 = getelementptr float, float* %B, i64 %pexp.p_div_q
+; POW2: %polly.access.B7 = getelementptr float, float* %B, i64 %pexp.p_div_q
; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d
; A[p + 128 * floord(-p - 1, 128) + 128]
@@ -64,11 +64,11 @@
; POW2: %19 = mul nsw i64 128, %polly.fdiv_q.shr
; POW2: %20 = add nsw i64 %p, %19
; POW2: %21 = add nsw i64 %20, 128
-; POW2: %polly.access.A10 = getelementptr float, float* %A, i64 %21
+; POW2: %polly.access.A8 = getelementptr float, float* %A, i64 %21
; A[p / 128]
; POW2: %pexp.div = sdiv exact i64 %p, 128
-; POW2: %polly.access.B13 = getelementptr float, float* %B, i64 %pexp.div
+; POW2: %polly.access.B9 = getelementptr float, float* %B, i64 %pexp.div
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/polly/test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll b/polly/test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll
index 61913cb09ef..59852f00932 100644
--- a/polly/test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll
+++ b/polly/test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll
@@ -13,6 +13,7 @@ loop:
br i1 %cond0, label %branch1, label %backedge
; CHECK-LABEL: polly.stmt.loop:
+; CHECK-NEXT: %polly.subregion.iv = phi i32 [ 0, %polly.stmt.loop.entry ]
; CHECK-NEXT: %p_val0 = fadd float 1.000000e+00, 2.000000e+00
; CHECK-NEXT: %p_val1 = fadd float 1.000000e+00, 2.000000e+00
; CHECK-NEXT: %p_val2 = fadd float 1.000000e+00, 2.000000e+00
diff --git a/polly/test/Isl/CodeGen/scalar-store-from-same-bb.ll b/polly/test/Isl/CodeGen/scalar-store-from-same-bb.ll
new file mode 100644
index 00000000000..1653562d1e1
--- /dev/null
+++ b/polly/test/Isl/CodeGen/scalar-store-from-same-bb.ll
@@ -0,0 +1,32 @@
+; RUN: opt %loadPolly -polly-detect-unprofitable -polly-no-early-exit \
+; RUN: -polly-codegen -S < %s | FileCheck %s
+
+; This test ensures that the expression N + 1 that is stored in the phi-node
+; alloca, is directly computed and not incorrectly transfered through memory.
+
+; CHECK: store i64 %2, i64* %res.phiops
+; CHECK: %2 = add i64 %N, 1
+
+define i64 @foo(float* %A, i64 %N) {
+entry:
+ br label %next
+
+next:
+ %cond = icmp eq i64 %N, 0
+ br i1 %cond, label %loop, label %merge
+
+loop:
+ %indvar = phi i64 [0, %next], [%indvar.next, %loop]
+ %indvar.next = add i64 %indvar, 1
+ %sum = add i64 %N, 1
+ store float 4.0, float* %A
+ %cmp = icmp sle i64 %indvar.next, 100
+ br i1 %cmp, label %loop, label %merge
+
+merge:
+ %res = phi i64 [%sum, %loop], [0, %next]
+ br label %exit
+
+exit:
+ ret i64 %res
+}
OpenPOWER on IntegriCloud