diff options
author | Hal Finkel <hfinkel@anl.gov> | 2015-09-04 00:10:41 +0000 |
---|---|---|
committer | Hal Finkel <hfinkel@anl.gov> | 2015-09-04 00:10:41 +0000 |
commit | 4a7be2397684f3c9d7d897392b96e68d4c086e76 (patch) | |
tree | 1157449f53cebfb889378171832e9784e418b802 /llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | |
parent | 824d1a975f9c532ccaf144bc4d8ad8cab66ea490 (diff) | |
download | bcm5719-llvm-4a7be2397684f3c9d7d897392b96e68d4c086e76.tar.gz bcm5719-llvm-4a7be2397684f3c9d7d897392b96e68d4c086e76.zip |
[PowerPC] Enable interleaved-access vectorization
This adds a basic cost model for interleaved-access vectorization (and a better
default for shuffles), and enables interleaved-access vectorization by default.
The relevant difference from the default cost model for interleaved-access
vectorization, is that on PPC, the shuffles that end up being used are *much*
cheaper than modeling the process with insert/extract pairs (which are
quite expensive, especially on older cores).
llvm-svn: 246824
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp')
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 38 |
1 files changed, 37 insertions, 1 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 124556e2d17..cd86dabd5ab 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -207,6 +207,10 @@ bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) { return LoopHasReductions; } +bool PPCTTIImpl::enableInterleavedAccessVectorization() { + return true; +} + unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) { if (Vector && !ST->hasAltivec() && !ST->hasQPX()) return 0; @@ -266,7 +270,15 @@ int PPCTTIImpl::getArithmeticInstrCost( int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { - return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); + // Legalize the type. + std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); + + // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations + // (at least in the sense that there need only be one non-loop-invariant + // instruction). We need one such shuffle instruction for each actual + // register (this is not true for arbitrary shuffles, but is true for the + // structured types of shuffles covered by TTI::ShuffleKind). + return LT.first; } int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { @@ -375,3 +387,27 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, return Cost; } +int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, + unsigned Factor, + ArrayRef<unsigned> Indices, + unsigned Alignment, + unsigned AddressSpace) { + assert(isa<VectorType>(VecTy) && + "Expect a vector type for interleaved memory op"); + + // Legalize the type. + std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy); + + // Firstly, the cost of load/store operation. + int Cost = getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace); + + // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations + // (at least in the sense that there need only be one non-loop-invariant + // instruction). For each result vector, we need one shuffle per incoming + // vector (except that the first shuffle can take two incoming vectors + // because it does not need to take itself). + Cost += Factor*(LT.first-1); + + return Cost; +} + |