diff options
Diffstat (limited to 'llvm/lib/Target/ARM')
-rw-r--r-- | llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 7 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 3 |
2 files changed, 7 insertions, 3 deletions
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 10e6297ef1e..cc001b59678 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -338,14 +338,17 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy); } -int ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) { +int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, + const SCEV *Ptr) { // Address computations in vectorized code with non-consecutive addresses will // likely result in more instructions compared to scalar code where the // computation can more often be merged into the index mode. The resulting // extra micro-ops can significantly decrease throughput. unsigned NumVectorInstToHideOverhead = 10; + int MaxMergeDistance = 64; - if (Ty->isVectorTy() && IsComplex) + if (Ty->isVectorTy() && SE && + !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1)) return NumVectorInstToHideOverhead; // In many cases the address computation is not merged into the instruction diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index d83228afb0a..731a5adf3d7 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -104,7 +104,8 @@ public: int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); - int getAddressComputationCost(Type *Val, bool IsComplex); + int getAddressComputationCost(Type *Val, ScalarEvolution *SE, + const SCEV *Ptr); int getFPOpCost(Type *Ty); |