summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohannes Doerfert <doerfert@cs.uni-saarland.de>2014-10-08 17:25:30 +0000
committerJohannes Doerfert <doerfert@cs.uni-saarland.de>2014-10-08 17:25:30 +0000
commit731685e6bcf04bf40254430de5356692d7bffa27 (patch)
tree5f92dc3f31d9ecc57b3d19fafa39ffbcdee6ce10
parent589b36aae7f0bc50711d4f8f9859269aa3cb7f62 (diff)
downloadbcm5719-llvm-731685e6bcf04bf40254430de5356692d7bffa27.tar.gz
bcm5719-llvm-731685e6bcf04bf40254430de5356692d7bffa27.zip
Allow the VectorBlockGenerator to use the IslExprBuilder.
This also enables the VectorBlockGenerator to build load store accesses according to the newAccessRelation of a MemoryAccess. llvm-svn: 219321
-rw-r--r--polly/include/polly/CodeGen/BlockGenerators.h12
-rw-r--r--polly/lib/CodeGen/BlockGenerators.cpp27
-rw-r--r--polly/lib/CodeGen/IslCodeGeneration.cpp3
-rw-r--r--polly/test/Isl/CodeGen/MemAccess/simple_analyze.ll30
4 files changed, 54 insertions, 18 deletions
diff --git a/polly/include/polly/CodeGen/BlockGenerators.h b/polly/include/polly/CodeGen/BlockGenerators.h
index af8ebd7e5e8..c44d2667a74 100644
--- a/polly/include/polly/CodeGen/BlockGenerators.h
+++ b/polly/include/polly/CodeGen/BlockGenerators.h
@@ -209,13 +209,17 @@ public:
/// The pass is needed to update other analysis.
/// @param LI The loop info for the current function
/// @param SE The scalar evolution info for the current function
+ /// @param Build The AST build with the new schedule.
+ /// @param ExprBuilder An expression builder to generate new access functions.
static void generate(PollyIRBuilder &B, ScopStmt &Stmt,
VectorValueMapT &GlobalMaps,
std::vector<LoopToScevMapT> &VLTS,
__isl_keep isl_map *Schedule, Pass *P, LoopInfo &LI,
- ScalarEvolution &SE) {
+ ScalarEvolution &SE,
+ __isl_keep isl_ast_build *Build = nullptr,
+ IslExprBuilder *ExprBuilder = nullptr) {
VectorBlockGenerator Generator(B, GlobalMaps, VLTS, Stmt, Schedule, P, LI,
- SE);
+ SE, Build, ExprBuilder);
Generator.copyBB();
}
@@ -252,7 +256,9 @@ private:
VectorBlockGenerator(PollyIRBuilder &B, VectorValueMapT &GlobalMaps,
std::vector<LoopToScevMapT> &VLTS, ScopStmt &Stmt,
__isl_keep isl_map *Schedule, Pass *P, LoopInfo &LI,
- ScalarEvolution &SE);
+ ScalarEvolution &SE,
+ __isl_keep isl_ast_build *Build = nullptr,
+ IslExprBuilder *ExprBuilder = nullptr);
int getVectorWidth();
diff --git a/polly/lib/CodeGen/BlockGenerators.cpp b/polly/lib/CodeGen/BlockGenerators.cpp
index 9a320fe2c05..44a6e01a19e 100644
--- a/polly/lib/CodeGen/BlockGenerators.cpp
+++ b/polly/lib/CodeGen/BlockGenerators.cpp
@@ -289,8 +289,9 @@ void BlockGenerator::copyBB(ValueMapT &GlobalMap, LoopToScevMapT &LTS) {
VectorBlockGenerator::VectorBlockGenerator(
PollyIRBuilder &B, VectorValueMapT &GlobalMaps,
std::vector<LoopToScevMapT> &VLTS, ScopStmt &Stmt,
- __isl_keep isl_map *Schedule, Pass *P, LoopInfo &LI, ScalarEvolution &SE)
- : BlockGenerator(B, Stmt, P, LI, SE, nullptr, nullptr),
+ __isl_keep isl_map *Schedule, Pass *P, LoopInfo &LI, ScalarEvolution &SE,
+ __isl_keep isl_ast_build *Build, IslExprBuilder *ExprBuilder)
+ : BlockGenerator(B, Stmt, P, LI, SE, Build, ExprBuilder),
GlobalMaps(GlobalMaps), VLTS(VLTS), Schedule(Schedule) {
assert(GlobalMaps.size() > 1 && "Only one vector lane found");
assert(Schedule && "No statement domain provided");
@@ -338,8 +339,8 @@ VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
unsigned Offset = NegativeStride ? VectorWidth - 1 : 0;
Value *NewPointer = nullptr;
- NewPointer = getNewValue(Pointer, ScalarMaps[Offset], GlobalMaps[Offset],
- VLTS[Offset], getLoopForInst(Load));
+ NewPointer = generateLocationAccessed(Load, Pointer, ScalarMaps[Offset],
+ GlobalMaps[Offset], VLTS[Offset]);
Value *VectorPtr =
Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
LoadInst *VecLoad =
@@ -365,7 +366,7 @@ Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load,
const Value *Pointer = Load->getPointerOperand();
Type *VectorPtrType = getVectorPtrTy(Pointer, 1);
Value *NewPointer =
- getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0], getLoopForInst(Load));
+ generateLocationAccessed(Load, Pointer, BBMap, GlobalMaps[0], VLTS[0]);
Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
Load->getName() + "_p_vec_p");
LoadInst *ScalarLoad =
@@ -393,8 +394,8 @@ VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load,
Value *Vector = UndefValue::get(VectorType);
for (int i = 0; i < VectorWidth; i++) {
- Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i],
- VLTS[i], getLoopForInst(Load));
+ Value *NewPointer = generateLocationAccessed(Load, Pointer, ScalarMaps[i],
+ GlobalMaps[i], VLTS[i]);
Value *ScalarLoad =
Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_");
Vector = Builder.CreateInsertElement(
@@ -481,8 +482,8 @@ void VectorBlockGenerator::copyStore(const StoreInst *Store,
if (Access.isStrideOne(isl_map_copy(Schedule))) {
Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
- Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0],
- VLTS[0], getLoopForInst(Store));
+ Value *NewPointer = generateLocationAccessed(Store, Pointer, ScalarMaps[0],
+ GlobalMaps[0], VLTS[0]);
Value *VectorPtr =
Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
@@ -493,8 +494,8 @@ void VectorBlockGenerator::copyStore(const StoreInst *Store,
} else {
for (unsigned i = 0; i < ScalarMaps.size(); i++) {
Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i));
- Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i],
- VLTS[i], getLoopForInst(Store));
+ Value *NewPointer = generateLocationAccessed(
+ Store, Pointer, ScalarMaps[i], GlobalMaps[i], VLTS[i]);
Builder.CreateStore(Scalar, NewPointer);
}
}
@@ -549,8 +550,8 @@ void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst,
HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
- copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane],
- VLTS[VectorLane]);
+ BlockGenerator::copyInstruction(Inst, ScalarMaps[VectorLane],
+ GlobalMaps[VectorLane], VLTS[VectorLane]);
if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
return;
diff --git a/polly/lib/CodeGen/IslCodeGeneration.cpp b/polly/lib/CodeGen/IslCodeGeneration.cpp
index d72298b54eb..004544ca103 100644
--- a/polly/lib/CodeGen/IslCodeGeneration.cpp
+++ b/polly/lib/CodeGen/IslCodeGeneration.cpp
@@ -248,7 +248,8 @@ void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User,
isl_map *S = isl_map_from_union_map(Schedule);
createSubstitutionsVector(Expr, Stmt, VectorMap, VLTS, IVS, IteratorID);
- VectorBlockGenerator::generate(Builder, *Stmt, VectorMap, VLTS, S, P, LI, SE);
+ VectorBlockGenerator::generate(Builder, *Stmt, VectorMap, VLTS, S, P, LI, SE,
+ IslAstInfo::getBuild(User), &ExprBuilder);
isl_map_free(S);
isl_id_free(Id);
diff --git a/polly/test/Isl/CodeGen/MemAccess/simple_analyze.ll b/polly/test/Isl/CodeGen/MemAccess/simple_analyze.ll
index 637eb8defc8..b8c92417c94 100644
--- a/polly/test/Isl/CodeGen/MemAccess/simple_analyze.ll
+++ b/polly/test/Isl/CodeGen/MemAccess/simple_analyze.ll
@@ -1,6 +1,6 @@
;RUN: opt %loadPolly -polly-import-jscop -analyze -polly-import-jscop-dir=%S -polly-import-jscop-postfix=transformed < %s | FileCheck %s
+;RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-dir=%S -polly-import-jscop-postfix=transformed -polly-codegen-isl -polly-vectorizer=polly -S < %s | FileCheck %s --check-prefix=JSCOPVEC
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
-target triple = "i386-pc-linux-gnu"
@A = common global [100 x i32] zeroinitializer, align 4
@B = common global [100 x i32] zeroinitializer, align 4
@@ -45,3 +45,31 @@ for.end14: ; preds = %for.cond4
}
; CHECK-DAG: New access function '{ Stmt_for_body7[i0] -> MemRef_B[0] }'detected in JSCOP file
; CHECK-DAG: New access function '{ Stmt_for_body[i0] -> MemRef_A[0] }'detected in JSCOP file
+
+; Verify that the new access function (see above) is actually used during vector code generation.
+
+; JSCOPVEC: store i32 0, i32* getelementptr inbounds ([100 x i32]* @B, i32 0, i32 0)
+; JSCOPVEC: store i32 1, i32* getelementptr inbounds ([100 x i32]* @B, i32 0, i32 0)
+; JSCOPVEC: store i32 2, i32* getelementptr inbounds ([100 x i32]* @B, i32 0, i32 0)
+; JSCOPVEC: store i32 3, i32* getelementptr inbounds ([100 x i32]* @B, i32 0, i32 0)
+; JSCOPVEC: store i32 4, i32* getelementptr inbounds ([100 x i32]* @B, i32 0, i32 0)
+; JSCOPVEC: store i32 5, i32* getelementptr inbounds ([100 x i32]* @B, i32 0, i32 0)
+; JSCOPVEC: store i32 6, i32* getelementptr inbounds ([100 x i32]* @B, i32 0, i32 0)
+; JSCOPVEC: store i32 7, i32* getelementptr inbounds ([100 x i32]* @B, i32 0, i32 0)
+; JSCOPVEC: store i32 8, i32* getelementptr inbounds ([100 x i32]* @B, i32 0, i32 0)
+; JSCOPVEC: store i32 9, i32* getelementptr inbounds ([100 x i32]* @B, i32 0, i32 0)
+; JSCOPVEC: store i32 10, i32* getelementptr inbounds ([100 x i32]* @B, i32 0, i32 0)
+; JSCOPVEC: store i32 11, i32* getelementptr inbounds ([100 x i32]* @B, i32 0, i32 0)
+
+; JSCOPVEC: store i32 0, i32* getelementptr inbounds ([100 x i32]* @A, i32 0, i32 0)
+; JSCOPVEC: store i32 1, i32* getelementptr inbounds ([100 x i32]* @A, i32 0, i32 0)
+; JSCOPVEC: store i32 2, i32* getelementptr inbounds ([100 x i32]* @A, i32 0, i32 0)
+; JSCOPVEC: store i32 3, i32* getelementptr inbounds ([100 x i32]* @A, i32 0, i32 0)
+; JSCOPVEC: store i32 4, i32* getelementptr inbounds ([100 x i32]* @A, i32 0, i32 0)
+; JSCOPVEC: store i32 5, i32* getelementptr inbounds ([100 x i32]* @A, i32 0, i32 0)
+; JSCOPVEC: store i32 6, i32* getelementptr inbounds ([100 x i32]* @A, i32 0, i32 0)
+; JSCOPVEC: store i32 7, i32* getelementptr inbounds ([100 x i32]* @A, i32 0, i32 0)
+; JSCOPVEC: store i32 8, i32* getelementptr inbounds ([100 x i32]* @A, i32 0, i32 0)
+; JSCOPVEC: store i32 9, i32* getelementptr inbounds ([100 x i32]* @A, i32 0, i32 0)
+; JSCOPVEC: store i32 10, i32* getelementptr inbounds ([100 x i32]* @A, i32 0, i32 0)
+; JSCOPVEC: store i32 11, i32* getelementptr inbounds ([100 x i32]* @A, i32 0, i32 0)
OpenPOWER on IntegriCloud