diff options
author | Farhana Aleen <farhana.aleen@gmail.com> | 2018-03-07 17:09:18 +0000 |
---|---|---|
committer | Farhana Aleen <farhana.aleen@gmail.com> | 2018-03-07 17:09:18 +0000 |
commit | 89196642f72cc3325963918c0426ce128c414104 (patch) | |
tree | 1c0fb19b4dcb115932854c6cfc86768b38a5e136 /llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | |
parent | c0e768df9050133466695f3a8f3bf0b3bb343987 (diff) | |
download | bcm5719-llvm-89196642f72cc3325963918c0426ce128c414104.tar.gz bcm5719-llvm-89196642f72cc3325963918c0426ce128c414104.zip |
[AMDGPU] Increased vector length for global/constant loads.
Summary: GCN ISA supports instructions that can read 16 consecutive dwords from memory through the scalar data cache;
loadstoreVectorizer should take advantage of the wider vector length and pack 16/8 elements of dwords/quadwords.
Author: FarhanaAleen
Reviewed By: rampitec
Subscribers: llvm-commits, AMDGPU
Differential Revision: https://reviews.llvm.org/D44179
llvm-svn: 326910
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 34 |
1 files changed, 30 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 6d6ab084ee6..4292575c601 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -234,12 +234,38 @@ unsigned AMDGPUTTIImpl::getMinVectorRegisterBitWidth() const { return 32; } +unsigned AMDGPUTTIImpl::getLoadVectorFactor(unsigned VF, unsigned LoadSize, + unsigned ChainSizeInBytes, + VectorType *VecTy) const { + unsigned VecRegBitWidth = VF * LoadSize; + if (VecRegBitWidth > 128 && VecTy->getScalarSizeInBits() < 32) + // TODO: Support element-size less than 32bit? + return 128 / LoadSize; + + return VF; +} + +unsigned AMDGPUTTIImpl::getStoreVectorFactor(unsigned VF, unsigned StoreSize, + unsigned ChainSizeInBytes, + VectorType *VecTy) const { + unsigned VecRegBitWidth = VF * StoreSize; + if (VecRegBitWidth > 128) + return 128 / StoreSize; + + return VF; +} + unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { AMDGPUAS AS = ST->getAMDGPUAS(); if (AddrSpace == AS.GLOBAL_ADDRESS || AddrSpace == AS.CONSTANT_ADDRESS || - AddrSpace == AS.CONSTANT_ADDRESS_32BIT || - AddrSpace == AS.FLAT_ADDRESS) + AddrSpace == AS.CONSTANT_ADDRESS_32BIT) { + if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) + return 128; + return 512; + } + + if (AddrSpace == AS.FLAT_ADDRESS) return 128; if (AddrSpace == AS.LOCAL_ADDRESS || AddrSpace == AS.REGION_ADDRESS) @@ -250,8 +276,8 @@ unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS && (AddrSpace == AS.PARAM_D_ADDRESS || AddrSpace == AS.PARAM_I_ADDRESS || - (AddrSpace >= AS.CONSTANT_BUFFER_0 && - AddrSpace <= AS.CONSTANT_BUFFER_15))) + (AddrSpace >= AS.CONSTANT_BUFFER_0 && + AddrSpace <= AS.CONSTANT_BUFFER_15))) return 128; llvm_unreachable("unhandled address space"); } |