diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Analysis/TargetTransformInfo.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/R600/AMDGPUTargetTransformInfo.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.h | 2 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2 |
11 files changed, 18 insertions, 12 deletions
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index a1519de25ee..e1744d1f296 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -186,8 +186,8 @@ unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { return TTIImpl->getRegisterBitWidth(Vector); } -unsigned TargetTransformInfo::getMaxInterleaveFactor() const { - return TTIImpl->getMaxInterleaveFactor(); +unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const { + return TTIImpl->getMaxInterleaveFactor(VF); } unsigned TargetTransformInfo::getArithmeticInstrCost( diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 0533355b01d..ed27cf84bbb 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -419,7 +419,7 @@ unsigned AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return Cost; } -unsigned AArch64TTIImpl::getMaxInterleaveFactor() { +unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) { if (ST->isCortexA57()) return 4; return 2; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index dd3fd1f5ab7..25c22bcd58e 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -110,7 +110,7 @@ public: return 64; } - unsigned getMaxInterleaveFactor(); + unsigned getMaxInterleaveFactor(unsigned VF); unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src); diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 97590f60893..9479d7693eb 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -96,7 +96,7 @@ public: return 32; } - unsigned getMaxInterleaveFactor() { + unsigned getMaxInterleaveFactor(unsigned VF) { // These are out of order CPUs: if (ST->isCortexA15() || ST->isSwift()) return 2; diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index b46acd47f31..08328d9acac 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -215,7 +215,7 @@ unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) { } -unsigned PPCTTIImpl::getMaxInterleaveFactor() { +unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) { unsigned Directive = ST->getDarwinDirective(); // The 440 has no SIMD support, but floating-point instructions // have a 5-cycle latency, so unroll by 5x for latency hiding. diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 21acea1a36d..35e7a1497c8 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -81,7 +81,7 @@ public: bool enableAggressiveInterleaving(bool LoopHasReductions); unsigned getNumberOfRegisters(bool Vector); unsigned getRegisterBitWidth(bool Vector); - unsigned getMaxInterleaveFactor(); + unsigned getMaxInterleaveFactor(unsigned VF); unsigned getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, diff --git a/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp index 96edc417822..6dacc742b12 100644 --- a/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp @@ -76,7 +76,7 @@ unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) { unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool) { return 32; } -unsigned AMDGPUTTIImpl::getMaxInterleaveFactor() { +unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) { // Semi-arbitrary large amount. return 64; } diff --git a/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.h index 4abbdf20e76..791c84e6f28 100644 --- a/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.h @@ -70,7 +70,7 @@ public: unsigned getNumberOfRegisters(bool Vector); unsigned getRegisterBitWidth(bool Vector); - unsigned getMaxInterleaveFactor(); + unsigned getMaxInterleaveFactor(unsigned VF); }; } // end namespace llvm diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 5136619235b..17c86a7b9f0 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -66,7 +66,13 @@ unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) { } -unsigned X86TTIImpl::getMaxInterleaveFactor() { +unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) { + // If the loop will not be vectorized, don't interleave the loop. + // Let regular unroll to unroll the loop, which saves the overflow + // check and memory check cost. + if (VF == 1) + return 1; + if (ST->isAtom()) return 1; diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index 9f0adcfef62..e570bb55710 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -72,7 +72,7 @@ public: unsigned getNumberOfRegisters(bool Vector); unsigned getRegisterBitWidth(bool Vector); - unsigned getMaxInterleaveFactor(); + unsigned getMaxInterleaveFactor(unsigned VF); unsigned getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index cdd3c680e8d..011fd0f6fa8 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4160,7 +4160,7 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize, std::max(1U, (R.MaxLocalUsers - 1))); // Clamp the unroll factor ranges to reasonable factors. - unsigned MaxInterleaveSize = TTI.getMaxInterleaveFactor(); + unsigned MaxInterleaveSize = TTI.getMaxInterleaveFactor(VF); // Check if the user has overridden the unroll max. if (VF == 1) { |