summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
diff options
context:
space:
mode:
authorMohammed Agabaria <mohammed.agabaria@intel.com>2017-01-05 14:03:41 +0000
committerMohammed Agabaria <mohammed.agabaria@intel.com>2017-01-05 14:03:41 +0000
commit23599ba7940d9891ed5eb982aaaed116f97aea74 (patch)
tree910698abcc007239ab93b2f167fcd7ff96a75603 /llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
parenta983e7c4a415bc28b8bc6218f4881d11b3a2d995 (diff)
downloadbcm5719-llvm-23599ba7940d9891ed5eb982aaaed116f97aea74.tar.gz
bcm5719-llvm-23599ba7940d9891ed5eb982aaaed116f97aea74.zip
Currently isLikelyComplexAddressComputation tries to figure out if the given stride seems to be 'complex' and need some extra cost for address computation handling.
This code seems to be target dependent which may not be the same for all targets. Passed the decision whether the given stride is complex or not to the target by sending stride information via SCEV to getAddressComputationCost instead of 'IsComplex'. Specifically at X86 targets we dont see any significant address computation cost in case of the strided access in general. Differential Revision: https://reviews.llvm.org/D27518 llvm-svn: 291106
Diffstat (limited to 'llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp')
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp7
1 files changed, 5 insertions, 2 deletions
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 10e6297ef1e..cc001b59678 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -338,14 +338,17 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}
-int ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
+ const SCEV *Ptr) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
// extra micro-ops can significantly decrease throughput.
unsigned NumVectorInstToHideOverhead = 10;
+ int MaxMergeDistance = 64;
- if (Ty->isVectorTy() && IsComplex)
+ if (Ty->isVectorTy() && SE &&
+ !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
return NumVectorInstToHideOverhead;
// In many cases the address computation is not merged into the instruction
OpenPOWER on IntegriCloud