diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-07-18 13:53:55 +0000 | 
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-07-18 13:53:55 +0000 | 
| commit | 2b37ddce4bd37ff70caa39fccc0fa0545d45ee12 (patch) | |
| tree | abf847b93ac27efe1c9997139a30c2ac45a84ea4 /llvm | |
| parent | 43d64b0b36c4bdc568c3a488df48a3a431da7a5c (diff) | |
| download | bcm5719-llvm-2b37ddce4bd37ff70caa39fccc0fa0545d45ee12.tar.gz bcm5719-llvm-2b37ddce4bd37ff70caa39fccc0fa0545d45ee12.zip  | |
[SLPVectorizer] Avoid duplicate scalar cost calculations in BoUpSLP::getEntryCost. NFCI.
Pulled out from D49225, we have a lot of repeated scalar cost calculations, often with arguments that don't look the same but turn out to be.
llvm-svn: 337390
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 87 | 
1 files changed, 37 insertions, 50 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index cc4fd0f764e..ac8c4f046c6 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2191,22 +2191,21 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {      case Instruction::FPTrunc:      case Instruction::BitCast: {        Type *SrcTy = VL0->getOperand(0)->getType(); +      int ScalarEltCost = +          TTI->getCastInstrCost(S.getOpcode(), ScalarTy, SrcTy, VL0);        if (NeedToShuffleReuses) { -        ReuseShuffleCost -= -            (ReuseShuffleNumbers - VL.size()) * -            TTI->getCastInstrCost(S.getOpcode(), ScalarTy, SrcTy, VL0); +        ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;        }        // Calculate the cost of this instruction. -      int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(), -                                                         VL0->getType(), SrcTy, VL0); +      int ScalarCost = VL.size() * ScalarEltCost;        VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());        int VecCost = 0;        // Check if the values are candidates to demote.        if (!MinBWs.count(VL0) || VecTy != SrcVecTy) {          VecCost = ReuseShuffleCost + -                  TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy, VL0); +                  TTI->getCastInstrCost(S.getOpcode(), VecTy, SrcVecTy, VL0);        }        return VecCost - ScalarCost;      } @@ -2214,15 +2213,13 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {      case Instruction::ICmp:      case Instruction::Select: {        // Calculate the cost of this instruction. +      int ScalarEltCost = TTI->getCmpSelInstrCost(S.getOpcode(), ScalarTy, +                                                  Builder.getInt1Ty(), VL0);        if (NeedToShuffleReuses) { -        ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * -                            TTI->getCmpSelInstrCost(S.getOpcode(), ScalarTy, -                                                    Builder.getInt1Ty(), VL0); +        ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;        }        VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size()); -      int ScalarCost = VecTy->getNumElements() * -                       TTI->getCmpSelInstrCost(S.getOpcode(), ScalarTy, -                                               Builder.getInt1Ty(), VL0); +      int ScalarCost = VecTy->getNumElements() * ScalarEltCost;        int VecCost = TTI->getCmpSelInstrCost(S.getOpcode(), VecTy, MaskTy, VL0);        return ReuseShuffleCost + VecCost - ScalarCost;      } @@ -2281,16 +2278,12 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {        }        SmallVector<const Value *, 4> Operands(VL0->operand_values()); +      int ScalarEltCost = TTI->getArithmeticInstrCost( +          S.getOpcode(), ScalarTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands);        if (NeedToShuffleReuses) { -        ReuseShuffleCost -= -            (ReuseShuffleNumbers - VL.size()) * -            TTI->getArithmeticInstrCost(S.getOpcode(), ScalarTy, Op1VK, Op2VK, -                                        Op1VP, Op2VP, Operands); -      } -      int ScalarCost = -          VecTy->getNumElements() * -          TTI->getArithmeticInstrCost(S.getOpcode(), ScalarTy, Op1VK, Op2VK, -                                      Op1VP, Op2VP, Operands); +        ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; +      } +      int ScalarCost = VecTy->getNumElements() * ScalarEltCost;        int VecCost = TTI->getArithmeticInstrCost(S.getOpcode(), VecTy, Op1VK,                                                  Op2VK, Op1VP, Op2VP, Operands);        return ReuseShuffleCost + VecCost - ScalarCost; @@ -2301,31 +2294,27 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {        TargetTransformInfo::OperandValueKind Op2VK =            TargetTransformInfo::OK_UniformConstantValue; +      int ScalarEltCost = +          TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, Op1VK, Op2VK);        if (NeedToShuffleReuses) { -        ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * -                            TTI->getArithmeticInstrCost(Instruction::Add, -                                                        ScalarTy, Op1VK, Op2VK); +        ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;        } -      int ScalarCost = -          VecTy->getNumElements() * -          TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, Op1VK, Op2VK); +      int ScalarCost = VecTy->getNumElements() * ScalarEltCost;        int VecCost =            TTI->getArithmeticInstrCost(Instruction::Add, VecTy, Op1VK, Op2VK); -        return ReuseShuffleCost + VecCost - ScalarCost;      }      case Instruction::Load: {        // Cost of wide load - cost of scalar loads.        unsigned alignment = cast<LoadInst>(VL0)->getAlignment(); +      int ScalarEltCost = +          TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0, VL0);        if (NeedToShuffleReuses) { -        ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * -                            TTI->getMemoryOpCost(Instruction::Load, ScalarTy, -                                                 alignment, 0, VL0); +        ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;        } -      int ScalarLdCost = VecTy->getNumElements() * -          TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0, VL0); -      int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, -                                           VecTy, alignment, 0, VL0); +      int ScalarLdCost = VecTy->getNumElements() * ScalarEltCost; +      int VecLdCost = +          TTI->getMemoryOpCost(Instruction::Load, VecTy, alignment, 0, VL0);        if (!E->ReorderIndices.empty()) {          // TODO: Merge this shuffle with the ReuseShuffleCost.          VecLdCost += TTI->getShuffleCost( @@ -2336,15 +2325,14 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {      case Instruction::Store: {        // We know that we can merge the stores. Calculate the cost.        unsigned alignment = cast<StoreInst>(VL0)->getAlignment(); +      int ScalarEltCost = +          TTI->getMemoryOpCost(Instruction::Store, ScalarTy, alignment, 0, VL0);        if (NeedToShuffleReuses) { -        ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * -                            TTI->getMemoryOpCost(Instruction::Store, ScalarTy, -                                                 alignment, 0, VL0); +        ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;        } -      int ScalarStCost = VecTy->getNumElements() * -          TTI->getMemoryOpCost(Instruction::Store, ScalarTy, alignment, 0, VL0); -      int VecStCost = TTI->getMemoryOpCost(Instruction::Store, -                                           VecTy, alignment, 0, VL0); +      int ScalarStCost = VecTy->getNumElements() * ScalarEltCost; +      int VecStCost = +          TTI->getMemoryOpCost(Instruction::Store, VecTy, alignment, 0, VL0);        return ReuseShuffleCost + VecStCost - ScalarStCost;      }      case Instruction::Call: { @@ -2352,21 +2340,20 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {        Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);        // Calculate the cost of the scalar and vector calls. -      SmallVector<Type*, 4> ScalarTys; -      for (unsigned op = 0, opc = CI->getNumArgOperands(); op!= opc; ++op) +      SmallVector<Type *, 4> ScalarTys; +      for (unsigned op = 0, opc = CI->getNumArgOperands(); op != opc; ++op)          ScalarTys.push_back(CI->getArgOperand(op)->getType());        FastMathFlags FMF;        if (auto *FPMO = dyn_cast<FPMathOperator>(CI))          FMF = FPMO->getFastMathFlags(); +      int ScalarEltCost = +          TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF);        if (NeedToShuffleReuses) { -        ReuseShuffleCost -= -            (ReuseShuffleNumbers - VL.size()) * -            TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF); +        ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;        } -      int ScalarCallCost = VecTy->getNumElements() * -          TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF); +      int ScalarCallCost = VecTy->getNumElements() * ScalarEltCost;        SmallVector<Value *, 4> Args(CI->arg_operands());        int VecCallCost = TTI->getIntrinsicInstrCost(ID, CI->getType(), Args, FMF, @@ -2398,7 +2385,6 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {                I, TargetTransformInfo::TCK_RecipThroughput);          }        } -      int VecCost = 0;        for (Value *i : VL) {          Instruction *I = cast<Instruction>(i);          assert(S.isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); @@ -2407,6 +2393,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {        }        // VecCost is equal to sum of the cost of creating 2 vectors        // and the cost of creating shuffle. +      int VecCost = 0;        if (Instruction::isBinaryOp(S.getOpcode())) {          VecCost = TTI->getArithmeticInstrCost(S.getOpcode(), VecTy);          VecCost += TTI->getArithmeticInstrCost(S.getAltOpcode(), VecTy);  | 

