summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/TargetTransformInfo.cpp5
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp7
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h2
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp7
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.h3
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp20
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.h3
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp59
8 files changed, 51 insertions, 55 deletions
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 2a15b9b264e..cd8c24630df 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -389,8 +389,9 @@ unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
}
int TargetTransformInfo::getAddressComputationCost(Type *Tp,
- bool IsComplex) const {
- int Cost = TTIImpl->getAddressComputationCost(Tp, IsComplex);
+ ScalarEvolution *SE,
+ const SCEV *Ptr) const {
+ int Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 88c98865bbc..1a17691fc58 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -417,14 +417,17 @@ int AArch64TTIImpl::getArithmeticInstrCost(
}
}
-int AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
+ const SCEV *Ptr) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
// extra micro-ops can significantly decrease throughput.
unsigned NumVectorInstToHideOverhead = 10;
+ int MaxMergeDistance = 64;
- if (Ty->isVectorTy() && IsComplex)
+ if (Ty->isVectorTy() && SE &&
+ !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
return NumVectorInstToHideOverhead;
// In many cases the address computation is not merged into the instruction
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 24642cb1698..849fd3d9b44 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -104,7 +104,7 @@ public:
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
- int getAddressComputationCost(Type *Ty, bool IsComplex);
+ int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr);
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 10e6297ef1e..cc001b59678 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -338,14 +338,17 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}
-int ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
+ const SCEV *Ptr) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
// extra micro-ops can significantly decrease throughput.
unsigned NumVectorInstToHideOverhead = 10;
+ int MaxMergeDistance = 64;
- if (Ty->isVectorTy() && IsComplex)
+ if (Ty->isVectorTy() && SE &&
+ !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
return NumVectorInstToHideOverhead;
// In many cases the address computation is not merged into the instruction
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index d83228afb0a..731a5adf3d7 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -104,7 +104,8 @@ public:
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
- int getAddressComputationCost(Type *Val, bool IsComplex);
+ int getAddressComputationCost(Type *Val, ScalarEvolution *SE,
+ const SCEV *Ptr);
int getFPOpCost(Type *Ty);
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 84ec14285d9..c2f542fd4b1 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1626,17 +1626,29 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
return Cost+LT.first;
}
-int X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int X86TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
+ const SCEV *Ptr) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
// extra micro-ops can significantly decrease throughput.
unsigned NumVectorInstToHideOverhead = 10;
- if (Ty->isVectorTy() && IsComplex)
- return NumVectorInstToHideOverhead;
+ // Cost modeling of Strided Access Computation is hidden by the indexing
+ // modes of X86 regardless of the stride value. We dont believe that there
+ // is a difference between constant strided access in gerenal and constant
+ // strided value which is less than or equal to 64.
+ // Even in the case of (loop invariant) stride whose value is not known at
+ // compile time, the address computation will not incur more than one extra
+ // ADD instruction.
+ if (Ty->isVectorTy() && SE) {
+ if (!BaseT::isStridedAccess(Ptr))
+ return NumVectorInstToHideOverhead;
+ if (!BaseT::getConstantStrideStep(SE, Ptr))
+ return 1;
+ }
- return BaseT::getAddressComputationCost(Ty, IsComplex);
+ return BaseT::getAddressComputationCost(Ty, SE, Ptr);
}
int X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index f6bcb9f569e..c013805f432 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -71,7 +71,8 @@ public:
unsigned AddressSpace);
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
bool VariableMask, unsigned Alignment);
- int getAddressComputationCost(Type *PtrTy, bool IsComplex);
+ int getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
+ const SCEV *Ptr);
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> Tys, FastMathFlags FMF);
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 8cde0c4cd60..31daba2248a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6785,22 +6785,19 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) {
return Cost;
}
-/// \brief Check whether the address computation for a non-consecutive memory
-/// access looks like an unlikely candidate for being merged into the indexing
-/// mode.
+/// \brief Gets Address Access SCEV after verifying that the access pattern
+/// is loop invariant except the induction variable dependence.
///
-/// We look for a GEP which has one index that is an induction variable and all
-/// other indices are loop invariant. If the stride of this access is also
-/// within a small bound we decide that this address computation can likely be
-/// merged into the addressing mode.
-/// In all other cases, we identify the address computation as complex.
-static bool isLikelyComplexAddressComputation(Value *Ptr,
- LoopVectorizationLegality *Legal,
- ScalarEvolution *SE,
- const Loop *TheLoop) {
+/// This SCEV can be sent to the Target in order to estimate the address
+/// calculation cost.
+static const SCEV *getAddressAccessSCEV(
+ Value *Ptr,
+ LoopVectorizationLegality *Legal,
+ ScalarEvolution *SE,
+ const Loop *TheLoop) {
auto *Gep = dyn_cast<GetElementPtrInst>(Ptr);
if (!Gep)
- return true;
+ return nullptr;
// We are looking for a gep with all loop invariant indices except for one
// which should be an induction variable.
@@ -6809,33 +6806,11 @@ static bool isLikelyComplexAddressComputation(Value *Ptr,
Value *Opd = Gep->getOperand(i);
if (!SE->isLoopInvariant(SE->getSCEV(Opd), TheLoop) &&
!Legal->isInductionVariable(Opd))
- return true;
+ return nullptr;
}
- // Now we know we have a GEP ptr, %inv, %ind, %inv. Make sure that the step
- // can likely be merged into the address computation.
- unsigned MaxMergeDistance = 64;
-
- const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Ptr));
- if (!AddRec)
- return true;
-
- // Check the step is constant.
- const SCEV *Step = AddRec->getStepRecurrence(*SE);
- // Calculate the pointer stride and check if it is consecutive.
- const auto *C = dyn_cast<SCEVConstant>(Step);
- if (!C)
- return true;
-
- const APInt &APStepVal = C->getAPInt();
-
- // Huge step value - give up.
- if (APStepVal.getBitWidth() > 64)
- return true;
-
- int64_t StepVal = APStepVal.getSExtValue();
-
- return StepVal > MaxMergeDistance;
+ // Now we know we have a GEP ptr, %inv, %ind, %inv. return the Ptr SCEV.
+ return SE->getSCEV(Ptr);
}
static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) {
@@ -7063,12 +7038,12 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
unsigned Cost = 0;
Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
- // True if the memory instruction's address computation is complex.
- bool IsComplexComputation =
- isLikelyComplexAddressComputation(Ptr, Legal, SE, TheLoop);
+ // Figure out whether the access is strided and get the stride value
+ // if it's known in compile time
+ const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, SE, TheLoop);
// Get the cost of the scalar memory instruction and address computation.
- Cost += VF * TTI.getAddressComputationCost(PtrTy, IsComplexComputation);
+ Cost += VF * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);
Cost += VF *
TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
Alignment, AS);
OpenPOWER on IntegriCloud