diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-05-10 21:29:33 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-05-10 21:29:33 +0000 |
commit | 3c5e4237c67e1ca8900e253fe4c8d7fc448c93e8 (patch) | |
tree | f329a2e02e0a30aedb742396d961585fdef0f642 /llvm/lib | |
parent | acdc7659cc274a46dfef4db5ccdfcafadfda71c4 (diff) | |
download | bcm5719-llvm-3c5e4237c67e1ca8900e253fe4c8d7fc448c93e8.tar.gz bcm5719-llvm-3c5e4237c67e1ca8900e253fe4c8d7fc448c93e8.zip |
AMDGPU: Make some packed shuffles free
VOP3P instructions can encode access to either
half of the register.
llvm-svn: 302730
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 34 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h | 3 |
2 files changed, 36 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index c9482c37ec8..beafebc1284 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -363,13 +363,22 @@ int AMDGPUTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index) { switch (Opcode) { case Instruction::ExtractElement: - case Instruction::InsertElement: + case Instruction::InsertElement: { + unsigned EltSize + = DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType()); + if (EltSize < 32) { + if (EltSize == 16 && Index == 0 && ST->has16BitInsts()) + return 0; + return BaseT::getVectorInstrCost(Opcode, ValTy, Index); + } + // Extracts are just reads of a subregister, so are free. Inserts are // considered free because we don't want to have any cost for scalarizing // operations, and we don't have to copy into a different register class. // Dynamic indexing isn't free and is best avoided. return Index == ~0u ? 2 : 0; + } default: return BaseT::getVectorInstrCost(Opcode, ValTy, Index); } @@ -479,3 +488,26 @@ bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const { return false; } + +unsigned AMDGPUTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) { + if (ST->hasVOP3PInsts()) { + VectorType *VT = cast<VectorType>(Tp); + if (VT->getNumElements() == 2 && + DL.getTypeSizeInBits(VT->getElementType()) == 16) { + // With op_sel VOP3P instructions freely can access the low half or high + // half of a register, so any swizzle is free. + + switch (Kind) { + case TTI::SK_Broadcast: + case TTI::SK_Reverse: + case TTI::SK_PermuteSingleSrc: + return 0; + default: + break; + } + } + } + + return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index 71d6306bc1a..e0024e21e82 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -114,6 +114,9 @@ public: } unsigned getVectorSplitCost() { return 0; } + + unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp); }; } // end namespace llvm |