diff options
| author | Arnold Schwaighofer <aschwaighofer@apple.com> | 2013-02-08 14:50:48 +0000 |
|---|---|---|
| committer | Arnold Schwaighofer <aschwaighofer@apple.com> | 2013-02-08 14:50:48 +0000 |
| commit | 594fa2dc2ba2f5748f294c25fe309b03d7eaf700 (patch) | |
| tree | 6fe16384da8197bf293ad2d55f0a81bd6a1c7fe9 /llvm/lib | |
| parent | 897f2cf408c9522187b3a375e201bdf6938f404f (diff) | |
| download | bcm5719-llvm-594fa2dc2ba2f5748f294c25fe309b03d7eaf700.tar.gz bcm5719-llvm-594fa2dc2ba2f5748f294c25fe309b03d7eaf700.zip | |
ARM cost model: Address computation in vector mem ops not free
Adds a function to target transform info to query for the cost of address
computation. The cost model analysis pass now also queries this interface.
The code in LoopVectorize adds the cost of address computation as part of the
memory instruction cost calculation. Only there, we know whether the instruction
will be scalarized or not.
Increase the penality for inserting in to D registers on swift. This becomes
necessary because we now always assume that address computation has a cost and
three is a closer value to the architecture.
radar://13097204
llvm-svn: 174713
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Analysis/CostModel.cpp | 5 | ||||
| -rw-r--r-- | llvm/lib/Analysis/TargetTransformInfo.cpp | 7 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/BasicTargetTransformInfo.cpp | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 13 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 22 |
5 files changed, 42 insertions, 10 deletions
diff --git a/llvm/lib/Analysis/CostModel.cpp b/llvm/lib/Analysis/CostModel.cpp index 1784512bce7..8435e397077 100644 --- a/llvm/lib/Analysis/CostModel.cpp +++ b/llvm/lib/Analysis/CostModel.cpp @@ -85,6 +85,11 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { return -1; switch (I->getOpcode()) { + case Instruction::GetElementPtr:{ + Type *ValTy = I->getOperand(0)->getType()->getPointerElementType(); + return TTI->getAddressComputationCost(ValTy); + } + case Instruction::Ret: case Instruction::PHI: case Instruction::Br: { diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 9fc21fdb923..72421a00c76 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -196,6 +196,9 @@ unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const { return PrevTTI->getNumberOfParts(Tp); } +unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp) const { + return PrevTTI->getAddressComputationCost(Tp); +} namespace { @@ -535,6 +538,10 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { unsigned getNumberOfParts(Type *Tp) const { return 0; } + + unsigned getAddressComputationCost(Type *Tp) const { + return 0; + } }; } // end anonymous namespace diff --git a/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp index ea5e93747dd..e8b5b4fe8d1 100644 --- a/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -101,6 +101,7 @@ public: virtual unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy, ArrayRef<Type*> Tys) const; virtual unsigned getNumberOfParts(Type *Tp) const; + virtual unsigned getAddressComputationCost(Type *Ty) const; /// @} }; @@ -400,3 +401,7 @@ unsigned BasicTTI::getNumberOfParts(Type *Tp) const { std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); return LT.first; } + +unsigned BasicTTI::getAddressComputationCost(Type *Ty) const { + return 0; +} diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 1f91e0ee362..f6fa3199709 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -120,6 +120,8 @@ public: unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const; unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const; + + unsigned getAddressComputationCost(Type *Val) const; /// @} }; @@ -304,12 +306,13 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, unsigned ARMTTI::getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index) const { - // Penalize inserting into an D-subregister. + // Penalize inserting into an D-subregister. We end up with a three times + // lower estimated throughput on swift. if (ST->isSwift() && Opcode == Instruction::InsertElement && ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32) - return 2; + return 3; return TargetTransformInfo::getVectorInstrCost(Opcode, ValTy, Index); } @@ -326,3 +329,9 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); } + +unsigned ARMTTI::getAddressComputationCost(Type *Ty) const { + // In many cases the address computation is not merged into the instruction + // addressing mode. + return 1; +} diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 91d565976ad..f12b0bf0f39 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3056,9 +3056,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { // TODO: We need to estimate the cost of intrinsic calls. switch (I->getOpcode()) { case Instruction::GetElementPtr: - // We mark this instruction as zero-cost because scalar GEPs are usually - // lowered to the intruction addressing mode. At the moment we don't - // generate vector geps. + // We mark this instruction as zero-cost because the cost of GEPs in + // vectorized code depends on whether the corresponding memory instruction + // is scalarized or not. Therefore, we handle GEPs with the memory + // instruction cost. return 0; case Instruction::Br: { return TTI.getCFInstrCost(I->getOpcode()); @@ -3113,9 +3114,12 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { unsigned AS = SI ? SI->getPointerAddressSpace() : LI->getPointerAddressSpace(); Value *Ptr = SI ? SI->getPointerOperand() : LI->getPointerOperand(); - + // We add the cost of address computation here instead of with the gep + // instruction because only here we know whether the operation is + // scalarized. if (VF == 1) - return TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS); + return TTI.getAddressComputationCost(VectorTy) + + TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS); // Scalarized loads/stores. int Stride = Legal->isConsecutivePtr(Ptr); @@ -3135,15 +3139,17 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { VectorTy, i); } - // The cost of the scalar stores. + // The cost of the scalar loads/stores. + Cost += VF * TTI.getAddressComputationCost(ValTy->getScalarType()); Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), Alignment, AS); return Cost; } // Wide load/stores. - unsigned Cost = TTI.getMemoryOpCost(I->getOpcode(), VectorTy, - Alignment, AS); + unsigned Cost = TTI.getAddressComputationCost(VectorTy); + Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS); + if (Reverse) Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0); |

