diff options
Diffstat (limited to 'llvm/lib/Transforms/Vectorize')
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 67 |
1 files changed, 49 insertions, 18 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index d2504c5e41d..201e9e939b3 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1532,15 +1532,26 @@ public: calculateRegisterUsage(const SmallVector<unsigned, 8> &VFs); private: + /// The vectorization cost is a combination of the cost itself and a boolean + /// indicating whether any of the contributing operations will actually operate on + /// vector values after type legalization in the backend. If this latter value is + /// false, then all operations will be scalarized (i.e. no vectorization has + /// actually taken place). + typedef std::pair<unsigned, bool> VectorizationCostTy; + /// Returns the expected execution cost. The unit of the cost does /// not matter because we use the 'cost' units to compare different /// vector widths. The cost that is returned is *not* normalized by /// the factor width. - unsigned expectedCost(unsigned VF); + VectorizationCostTy expectedCost(unsigned VF); /// Returns the execution time cost of an instruction for a given vector /// width. Vector width of one means scalar. - unsigned getInstructionCost(Instruction *I, unsigned VF); + VectorizationCostTy getInstructionCost(Instruction *I, unsigned VF); + + /// The cost-computation logic from getInstructionCost which provides + /// the vector type as an output parameter. + unsigned getInstructionCost(Instruction *I, unsigned VF, Type *&VectorTy); /// Returns whether the instruction is a load or store and will be a emitted /// as a vector operation. @@ -5145,7 +5156,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { return Factor; } - float Cost = expectedCost(1); + float Cost = expectedCost(1).first; #ifndef NDEBUG const float ScalarCost = Cost; #endif /* NDEBUG */ @@ -5156,16 +5167,22 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { // Ignore scalar width, because the user explicitly wants vectorization. if (ForceVectorization && VF > 1) { Width = 2; - Cost = expectedCost(Width) / (float)Width; + Cost = expectedCost(Width).first / (float)Width; } for (unsigned i=2; i <= VF; i*=2) { // Notice that the vector loop needs to be executed less times, so // we need to divide the cost of the vector loops by the width of // the vector elements. - float VectorCost = expectedCost(i) / (float)i; + VectorizationCostTy C = expectedCost(i); + float VectorCost = C.first / (float)i; DEBUG(dbgs() << "LV: Vector loop of width " << i << " costs: " << (int)VectorCost << ".\n"); + if (!C.second && !ForceVectorization) { + DEBUG(dbgs() << "LV: Not considering vector loop of width " << i << + " because it will not generate any vector instructions.\n"); + continue; + } if (VectorCost < Cost) { Cost = VectorCost; Width = i; @@ -5313,7 +5330,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, // If we did not calculate the cost for VF (because the user selected the VF) // then we calculate the cost of VF here. if (LoopCost == 0) - LoopCost = expectedCost(VF); + LoopCost = expectedCost(VF).first; // Clamp the calculated IC to be between the 1 and the max interleave count // that the target allows. @@ -5540,13 +5557,14 @@ LoopVectorizationCostModel::calculateRegisterUsage( return RUs; } -unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) { - unsigned Cost = 0; +LoopVectorizationCostModel::VectorizationCostTy +LoopVectorizationCostModel::expectedCost(unsigned VF) { + VectorizationCostTy Cost; // For each block. for (Loop::block_iterator bb = TheLoop->block_begin(), be = TheLoop->block_end(); bb != be; ++bb) { - unsigned BlockCost = 0; + VectorizationCostTy BlockCost; BasicBlock *BB = *bb; // For each instruction in the old loop. @@ -5559,24 +5577,26 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) { if (ValuesToIgnore.count(&*it)) continue; - unsigned C = getInstructionCost(&*it, VF); + VectorizationCostTy C = getInstructionCost(&*it, VF); // Check if we should override the cost. if (ForceTargetInstructionCost.getNumOccurrences() > 0) - C = ForceTargetInstructionCost; + C.first = ForceTargetInstructionCost; - BlockCost += C; - DEBUG(dbgs() << "LV: Found an estimated cost of " << C << " for VF " << - VF << " For instruction: " << *it << '\n'); + BlockCost.first += C.first; + BlockCost.second |= C.second; + DEBUG(dbgs() << "LV: Found an estimated cost of " << C.first << + " for VF " << VF << " For instruction: " << *it << '\n'); } // We assume that if-converted blocks have a 50% chance of being executed. // When the code is scalar then some of the blocks are avoided due to CF. // When the code is vectorized we execute all code paths. if (VF == 1 && Legal->blockNeedsPredication(*bb)) - BlockCost /= 2; + BlockCost.first /= 2; - Cost += BlockCost; + Cost.first += BlockCost.first; + Cost.second |= BlockCost.second; } return Cost; @@ -5653,17 +5673,28 @@ static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) { Legal->hasStride(I->getOperand(1)); } -unsigned +LoopVectorizationCostModel::VectorizationCostTy LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { // If we know that this instruction will remain uniform, check the cost of // the scalar version. if (Legal->isUniformAfterVectorization(I)) VF = 1; + Type *VectorTy; + unsigned C = getInstructionCost(I, VF, VectorTy); + + bool TypeNotScalarized = VF > 1 && !VectorTy->isVoidTy() && + TTI.getNumberOfParts(VectorTy) < VF; + return VectorizationCostTy(C, TypeNotScalarized); +} + +unsigned +LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF, + Type *&VectorTy) { Type *RetTy = I->getType(); if (VF > 1 && MinBWs.count(I)) RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]); - Type *VectorTy = ToVectorTy(RetTy, VF); + VectorTy = ToVectorTy(RetTy, VF); // TODO: We need to estimate the cost of intrinsic calls. switch (I->getOpcode()) { |

