summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
diff options
context:
space:
mode:
authorHal Finkel <hfinkel@anl.gov>2015-09-04 00:10:41 +0000
committerHal Finkel <hfinkel@anl.gov>2015-09-04 00:10:41 +0000
commit4a7be2397684f3c9d7d897392b96e68d4c086e76 (patch)
tree1157449f53cebfb889378171832e9784e418b802 /llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
parent824d1a975f9c532ccaf144bc4d8ad8cab66ea490 (diff)
downloadbcm5719-llvm-4a7be2397684f3c9d7d897392b96e68d4c086e76.tar.gz
bcm5719-llvm-4a7be2397684f3c9d7d897392b96e68d4c086e76.zip
[PowerPC] Enable interleaved-access vectorization
This adds a basic cost model for interleaved-access vectorization (and a better default for shuffles), and enables interleaved-access vectorization by default. The relevant difference from the default cost model for interleaved-access vectorization, is that on PPC, the shuffles that end up being used are *much* cheaper than modeling the process with insert/extract pairs (which are quite expensive, especially on older cores). llvm-svn: 246824
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp')
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp38
1 files changed, 37 insertions, 1 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 124556e2d17..cd86dabd5ab 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -207,6 +207,10 @@ bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
return LoopHasReductions;
}
+bool PPCTTIImpl::enableInterleavedAccessVectorization() {
+ return true;
+}
+
unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) {
if (Vector && !ST->hasAltivec() && !ST->hasQPX())
return 0;
@@ -266,7 +270,15 @@ int PPCTTIImpl::getArithmeticInstrCost(
int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
- return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+ // Legalize the type.
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+
+ // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
+ // (at least in the sense that there need only be one non-loop-invariant
+ // instruction). We need one such shuffle instruction for each actual
+ // register (this is not true for arbitrary shuffles, but is true for the
+ // structured types of shuffles covered by TTI::ShuffleKind).
+ return LT.first;
}
int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
@@ -375,3 +387,27 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
return Cost;
}
+int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace) {
+ assert(isa<VectorType>(VecTy) &&
+ "Expect a vector type for interleaved memory op");
+
+ // Legalize the type.
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
+
+ // Firstly, the cost of load/store operation.
+ int Cost = getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace);
+
+ // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
+ // (at least in the sense that there need only be one non-loop-invariant
+ // instruction). For each result vector, we need one shuffle per incoming
+ // vector (except that the first shuffle can take two incoming vectors
+ // because it does not need to take itself).
+ Cost += Factor*(LT.first-1);
+
+ return Cost;
+}
+
OpenPOWER on IntegriCloud