diff options
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 38 |
1 files changed, 25 insertions, 13 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index ec05812768f..9bffb1da4cd 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -342,6 +342,14 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL, unsigned VF) { return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty); } +/// A helper function that returns the reciprocal of the block probability of +/// predicated blocks. If we return X, we are assuming the predicated block +/// will execute once for for every X iterations of the loop header. +/// +/// TODO: We should use actual block probability here, if available. Currently, +/// we always assume predicated blocks have a 50% chance of executing. +static unsigned getReciprocalPredBlockProb() { return 2; } + /// InnerLoopVectorizer vectorizes loops which contain only one basic /// block to a specified vectorization factor (VF). /// This class performs the widening of scalars into vectors, or multiple @@ -3554,12 +3562,11 @@ static unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract, } } - // We assume that if-converted blocks have a 50% chance of being executed. - // Predicated scalarized instructions are avoided due to the CF that bypasses - // turned off lanes. The extracts and inserts will be sinked/hoisted to the - // predicated basic-block and are subjected to the same assumption. + // If we have a predicated instruction, it may not be executed for each + // vector lane. Scale the cost by the probability of executing the + // predicated block. if (Predicated) - Cost /= 2; + Cost /= getReciprocalPredBlockProb(); return Cost; } @@ -6397,11 +6404,14 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) { << VF << " For instruction: " << I << '\n'); } - // We assume that if-converted blocks have a 50% chance of being executed. - // When the code is scalar then some of the blocks are avoided due to CF. - // When the code is vectorized we execute all code paths. + // If we are vectorizing a predicated block, it will have been + // if-converted. This means that the block's instructions (aside from + // stores and instructions that may divide by zero) will now be + // unconditionally executed. For the scalar case, we may not always execute + // the predicated block. Thus, scale the block's cost by the probability of + // executing it. if (VF == 1 && Legal->blockNeedsPredication(BB)) - BlockCost.first /= 2; + BlockCost.first /= getReciprocalPredBlockProb(); Cost.first += BlockCost.first; Cost.second |= BlockCost.second; @@ -6518,11 +6528,13 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, case Instruction::SDiv: case Instruction::URem: case Instruction::SRem: - // We assume that if-converted blocks have a 50% chance of being executed. - // Predicated scalarized instructions are avoided due to the CF that - // bypasses turned off lanes. If we are not predicating, fallthrough. + // If we have a predicated instruction, it may not be executed for each + // vector lane. Get the scalarization cost and scale this amount by the + // probability of executing the predicated block. If the instruction is not + // predicated, we fall through to the next case. if (VF > 1 && Legal->isScalarWithPredication(I)) - return VF * TTI.getArithmeticInstrCost(I->getOpcode(), RetTy) / 2 + + return VF * TTI.getArithmeticInstrCost(I->getOpcode(), RetTy) / + getReciprocalPredBlockProb() + getScalarizationOverhead(I, VF, true, TTI); case Instruction::Add: case Instruction::FAdd: |