diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 38 |
1 files changed, 24 insertions, 14 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 87dcfafd275..73aa5cd0666 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1230,21 +1230,12 @@ bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT, return true; } -bool SITargetLowering::allowsMisalignedMemoryAccesses( - EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags, - bool *IsFast) const { +bool SITargetLowering::allowsMisalignedMemoryAccessesImpl( + unsigned Size, unsigned AddrSpace, unsigned Align, + MachineMemOperand::Flags Flags, bool *IsFast) const { if (IsFast) *IsFast = false; - // TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96, - // which isn't a simple VT. - // Until MVT is extended to handle this, simply check for the size and - // rely on the condition below: allow accesses if the size is a multiple of 4. - if (VT == MVT::Other || (VT != MVT::Other && VT.getSizeInBits() > 1024 && - VT.getStoreSize() > 16)) { - return false; - } - if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || AddrSpace == AMDGPUAS::REGION_ADDRESS) { // ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte @@ -1283,7 +1274,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses( } // Smaller than dword value must be aligned. - if (VT.bitsLT(MVT::i32)) + if (Size < 32) return false; // 8.1.6 - For Dword or larger reads or writes, the two LSBs of the @@ -1292,7 +1283,26 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses( if (IsFast) *IsFast = true; - return VT.bitsGT(MVT::i32) && Align % 4 == 0; + return Size >= 32 && Align >= 4; +} + +bool SITargetLowering::allowsMisalignedMemoryAccesses( + EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags, + bool *IsFast) const { + if (IsFast) + *IsFast = false; + + // TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96, + // which isn't a simple VT. + // Until MVT is extended to handle this, simply check for the size and + // rely on the condition below: allow accesses if the size is a multiple of 4. + if (VT == MVT::Other || (VT != MVT::Other && VT.getSizeInBits() > 1024 && + VT.getStoreSize() > 16)) { + return false; + } + + return allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace, + Align, Flags, IsFast); } EVT SITargetLowering::getOptimalMemOpType( |