summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorMark Searles <m.c.searles@gmail.com>2018-02-19 16:42:49 +0000
committerMark Searles <m.c.searles@gmail.com>2018-02-19 16:42:49 +0000
commit419bdab7595ff213c326f40683b1dca78c5fff03 (patch)
tree3d364c7700c810b7cabe7961904561039ef056dd /llvm/lib/Target
parentbc35f069f4c88d355488466f9c2d7953c9202168 (diff)
downloadbcm5719-llvm-419bdab7595ff213c326f40683b1dca78c5fff03.tar.gz
bcm5719-llvm-419bdab7595ff213c326f40683b1dca78c5fff03.zip
[AMDGPU] Increased vector length for global/constant loads.
Summary: GCN ISA supports instructions that can read 16 consecutive dwords from memory through the scalar data cache; loadstoreVectorizer should take advantage of the wider vector length and pack 16/8 elements of dwords/quadwords. Author: FarhanaAleen Reviewed By: rampitec Subscribers: llvm-commits, AMDGPU Differential Revision: https://reviews.llvm.org/D43275 llvm-svn: 325518
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp30
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h6
2 files changed, 34 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 00ff0308ba1..f478ffbc511 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -233,12 +233,38 @@ unsigned AMDGPUTTIImpl::getMinVectorRegisterBitWidth() const {
return 32;
}
+unsigned AMDGPUTTIImpl::getLoadVectorFactor(unsigned VF, unsigned LoadSize,
+ unsigned ChainSizeInBytes,
+ VectorType *VecTy) const {
+ unsigned VecRegBitWidth = VF * LoadSize;
+ if (VecRegBitWidth > 128 && VecTy->getScalarSizeInBits() < 32)
+ // TODO: Support element-size less than 32bit?
+ return 128 / LoadSize;
+
+ return VF;
+}
+
+unsigned AMDGPUTTIImpl::getStoreVectorFactor(unsigned VF, unsigned StoreSize,
+ unsigned ChainSizeInBytes,
+ VectorType *VecTy) const {
+ unsigned VecRegBitWidth = VF * StoreSize;
+ if (VecRegBitWidth > 128)
+ return 128 / StoreSize;
+
+ return VF;
+}
+
unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
AMDGPUAS AS = ST->getAMDGPUAS();
if (AddrSpace == AS.GLOBAL_ADDRESS ||
AddrSpace == AS.CONSTANT_ADDRESS ||
- AddrSpace == AS.CONSTANT_ADDRESS_32BIT ||
- AddrSpace == AS.FLAT_ADDRESS)
+ AddrSpace == AS.CONSTANT_ADDRESS_32BIT) {
+ if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
+ return 128;
+ return 512;
+ }
+
+ if (AddrSpace == AS.FLAT_ADDRESS)
return 128;
if (AddrSpace == AS.LOCAL_ADDRESS ||
AddrSpace == AS.REGION_ADDRESS)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 8899d2c6da8..a112757173d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -118,6 +118,12 @@ public:
unsigned getNumberOfRegisters(bool Vector) const;
unsigned getRegisterBitWidth(bool Vector) const;
unsigned getMinVectorRegisterBitWidth() const;
+ unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
+ unsigned ChainSizeInBytes,
+ VectorType *VecTy) const;
+ unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
+ unsigned ChainSizeInBytes,
+ VectorType *VecTy) const;
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
OpenPOWER on IntegriCloud