diff options
author | Mark Searles <m.c.searles@gmail.com> | 2018-02-19 16:42:49 +0000 |
---|---|---|
committer | Mark Searles <m.c.searles@gmail.com> | 2018-02-19 16:42:49 +0000 |
commit | 419bdab7595ff213c326f40683b1dca78c5fff03 (patch) | |
tree | 3d364c7700c810b7cabe7961904561039ef056dd /llvm/lib | |
parent | bc35f069f4c88d355488466f9c2d7953c9202168 (diff) | |
download | bcm5719-llvm-419bdab7595ff213c326f40683b1dca78c5fff03.tar.gz bcm5719-llvm-419bdab7595ff213c326f40683b1dca78c5fff03.zip |
[AMDGPU] Increased vector length for global/constant loads.
Summary: GCN ISA supports instructions that can read 16 consecutive dwords from memory through the scalar data cache; loadstoreVectorizer should take advantage of the wider vector length and pack 16/8 elements of dwords/quadwords.
Author: FarhanaAleen
Reviewed By: rampitec
Subscribers: llvm-commits, AMDGPU
Differential Revision: https://reviews.llvm.org/D43275
llvm-svn: 325518
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 30 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h | 6 |
2 files changed, 34 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 00ff0308ba1..f478ffbc511 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -233,12 +233,38 @@ unsigned AMDGPUTTIImpl::getMinVectorRegisterBitWidth() const { return 32; } +unsigned AMDGPUTTIImpl::getLoadVectorFactor(unsigned VF, unsigned LoadSize, + unsigned ChainSizeInBytes, + VectorType *VecTy) const { + unsigned VecRegBitWidth = VF * LoadSize; + if (VecRegBitWidth > 128 && VecTy->getScalarSizeInBits() < 32) + // TODO: Support element-size less than 32bit? + return 128 / LoadSize; + + return VF; +} + +unsigned AMDGPUTTIImpl::getStoreVectorFactor(unsigned VF, unsigned StoreSize, + unsigned ChainSizeInBytes, + VectorType *VecTy) const { + unsigned VecRegBitWidth = VF * StoreSize; + if (VecRegBitWidth > 128) + return 128 / StoreSize; + + return VF; +} + unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { AMDGPUAS AS = ST->getAMDGPUAS(); if (AddrSpace == AS.GLOBAL_ADDRESS || AddrSpace == AS.CONSTANT_ADDRESS || - AddrSpace == AS.CONSTANT_ADDRESS_32BIT || - AddrSpace == AS.FLAT_ADDRESS) + AddrSpace == AS.CONSTANT_ADDRESS_32BIT) { + if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) + return 128; + return 512; + } + + if (AddrSpace == AS.FLAT_ADDRESS) return 128; if (AddrSpace == AS.LOCAL_ADDRESS || AddrSpace == AS.REGION_ADDRESS) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index 8899d2c6da8..a112757173d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -118,6 +118,12 @@ public: unsigned getNumberOfRegisters(bool Vector) const; unsigned getRegisterBitWidth(bool Vector) const; unsigned getMinVectorRegisterBitWidth() const; + unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, + unsigned ChainSizeInBytes, + VectorType *VecTy) const; + unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, + unsigned ChainSizeInBytes, + VectorType *VecTy) const; unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, |