summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
diff options
context:
space:
mode:
authorNadav Rotem <nrotem@apple.com>2012-10-26 23:49:28 +0000
committerNadav Rotem <nrotem@apple.com>2012-10-26 23:49:28 +0000
commitafae78edabfdbe187c5f4d6d41bfe17003196b87 (patch)
tree85d9c182fd1cd93b7d88c213a82fc223b173da33 /llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
parent1f06e7f00e9b3f9e769f0e4b2c0020920af33ed3 (diff)
downloadbcm5719-llvm-afae78edabfdbe187c5f4d6d41bfe17003196b87.tar.gz
bcm5719-llvm-afae78edabfdbe187c5f4d6d41bfe17003196b87.zip
Refactor the VectorTargetTransformInfo interface.
Add getCostXXX calls for different families of opcodes, such as casts, arithmetic, cmp, etc. Port the LoopVectorizer to the new API. The LoopVectorizer now finds instructions which will remain uniform after vectorization. It uses this information when calculating the cost of these instructions. llvm-svn: 166836
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/LoopVectorize.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp61
1 files changed, 52 insertions, 9 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e47baf89083..1773812da24 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -108,7 +108,7 @@ public:
createEmptyLoop(Legal);
/// Widen each instruction in the old loop to a new one in the new loop.
/// Use the Legality module to find the induction and reduction variables.
- vectorizeLoop(Legal);
+ vectorizeLoop(Legal);
// register the new loop.
cleanup();
}
@@ -254,6 +254,9 @@ public:
/// This check allows us to vectorize A[idx] into a wide load/store.
bool isConsecutiveGep(Value *Ptr);
+ /// Returns true if this instruction will remain scalar after vectorization.
+ bool isUniformAfterVectorization(Instruction* I) {return Uniforms.count(I);}
+
private:
/// Check if a single basic block loop is vectorizable.
/// At this point we know that this is a loop with a constant trip count
@@ -291,6 +294,9 @@ private:
/// Allowed outside users. This holds the reduction
/// vars which can be accessed from outside the loop.
SmallPtrSet<Value*, 4> AllowedExit;
+ /// This set holds the variables which are known to be uniform after
+ /// vectorization.
+ SmallPtrSet<Instruction*, 4> Uniforms;
};
/// LoopVectorizationCostModel - estimates the expected speedups due to
@@ -1177,9 +1183,40 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
return false;
}
- // If the memory dependencies do not prevent us from
- // vectorizing, then vectorize.
- return canVectorizeMemory(BB);
+ // Don't vectorize if the memory dependencies do not allow vectorization.
+ if (!canVectorizeMemory(BB))
+ return false;
+
+ // We now know that the loop is vectorizable!
+ // Collect variables that will remain uniform after vectorization.
+ std::vector<Value*> Worklist;
+
+ // Start with the conditional branch and walk up the block.
+ Worklist.push_back(BB.getTerminator()->getOperand(0));
+
+ while (Worklist.size()) {
+ Instruction *I = dyn_cast<Instruction>(Worklist.back());
+ Worklist.pop_back();
+ // Look at instructions inside this block.
+ if (!I) continue;
+ if (I->getParent() != &BB) continue;
+
+ // Stop when reaching PHI nodes.
+ if (isa<PHINode>(I)) {
+ assert(I == Induction && "Found a uniform PHI that is not the induction");
+ break;
+ }
+
+ // This is a known uniform.
+ Uniforms.insert(I);
+
+ // Insert all operands.
+ for (int i=0, Op = I->getNumOperands(); i < Op; ++i) {
+ Worklist.push_back(I->getOperand(i));
+ }
+ }
+
+ return true;
}
bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
@@ -1484,9 +1521,15 @@ unsigned
LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
assert(VTTI && "Invalid vector target transformation info");
+ // If we know that this instruction will remain uniform, check the cost of
+ // the scalar version.
+ if (Legal->isUniformAfterVectorization(I))
+ VF = 1;
+
Type *RetTy = I->getType();
Type *VectorTy = ToVectorTy(RetTy, VF);
+
// TODO: We need to estimate the cost of intrinsic calls.
switch (I->getOpcode()) {
case Instruction::GetElementPtr:
@@ -1495,7 +1538,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
// generate vector geps.
return 0;
case Instruction::Br: {
- return VTTI->getInstrCost(I->getOpcode());
+ return VTTI->getCFInstrCost(I->getOpcode());
}
case Instruction::PHI:
return 0;
@@ -1517,7 +1560,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- return VTTI->getInstrCost(I->getOpcode(), VectorTy);
+ return VTTI->getArithmeticInstrCost(I->getOpcode(), VectorTy);
}
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
@@ -1527,13 +1570,13 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
if (ScalarCond)
CondTy = VectorType::get(CondTy, VF);
- return VTTI->getInstrCost(I->getOpcode(), VectorTy, CondTy);
+ return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy);
}
case Instruction::ICmp:
case Instruction::FCmp: {
Type *ValTy = I->getOperand(0)->getType();
VectorTy = ToVectorTy(ValTy, VF);
- return VTTI->getInstrCost(I->getOpcode(), VectorTy);
+ return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy);
}
case Instruction::Store: {
StoreInst *SI = cast<StoreInst>(I);
@@ -1602,7 +1645,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
case Instruction::FPTrunc:
case Instruction::BitCast: {
Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF);
- return VTTI->getInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
+ return VTTI->getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
}
default: {
// We are scalarizing the instruction. Return the cost of the scalar
OpenPOWER on IntegriCloud