summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp38
1 files changed, 24 insertions, 14 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 87dcfafd275..73aa5cd0666 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1230,21 +1230,12 @@ bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
return true;
}
-bool SITargetLowering::allowsMisalignedMemoryAccesses(
- EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
- bool *IsFast) const {
+bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
+ unsigned Size, unsigned AddrSpace, unsigned Align,
+ MachineMemOperand::Flags Flags, bool *IsFast) const {
if (IsFast)
*IsFast = false;
- // TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96,
- // which isn't a simple VT.
- // Until MVT is extended to handle this, simply check for the size and
- // rely on the condition below: allow accesses if the size is a multiple of 4.
- if (VT == MVT::Other || (VT != MVT::Other && VT.getSizeInBits() > 1024 &&
- VT.getStoreSize() > 16)) {
- return false;
- }
-
if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
AddrSpace == AMDGPUAS::REGION_ADDRESS) {
// ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
@@ -1283,7 +1274,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(
}
// Smaller than dword value must be aligned.
- if (VT.bitsLT(MVT::i32))
+ if (Size < 32)
return false;
// 8.1.6 - For Dword or larger reads or writes, the two LSBs of the
@@ -1292,7 +1283,26 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(
if (IsFast)
*IsFast = true;
- return VT.bitsGT(MVT::i32) && Align % 4 == 0;
+ return Size >= 32 && Align >= 4;
+}
+
+bool SITargetLowering::allowsMisalignedMemoryAccesses(
+ EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
+ bool *IsFast) const {
+ if (IsFast)
+ *IsFast = false;
+
+ // TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96,
+ // which isn't a simple VT.
+ // Until MVT is extended to handle this, simply check for the size and
+ // rely on the condition below: allow accesses if the size is a multiple of 4.
+ if (VT == MVT::Other || (VT != MVT::Other && VT.getSizeInBits() > 1024 &&
+ VT.getStoreSize() > 16)) {
+ return false;
+ }
+
+ return allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace,
+ Align, Flags, IsFast);
}
EVT SITargetLowering::getOptimalMemOpType(
OpenPOWER on IntegriCloud