summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp22
1 files changed, 14 insertions, 8 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index a20f8d00e82..8f36aaa2f45 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -438,24 +438,30 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
if (!VT.isSimple() || VT == MVT::Other)
return false;
- // TODO - CI+ supports unaligned memory accesses, but this requires driver
- // support.
-
- // XXX - The only mention I see of this in the ISA manual is for LDS direct
- // reads the "byte address and must be dword aligned". Is it also true for the
- // normal loads and stores?
- if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS) {
+ if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
+ AddrSpace == AMDGPUAS::REGION_ADDRESS) {
// ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
// aligned, 8 byte access in a single operation using ds_read2/write2_b32
// with adjacent offsets.
bool AlignedBy4 = (Align % 4 == 0);
if (IsFast)
*IsFast = AlignedBy4;
+
return AlignedBy4;
}
+ if (Subtarget->hasUnalignedBufferAccess()) {
+ // If we have an uniform constant load, it still requires using a slow
+ // buffer instruction if unaligned.
+ if (IsFast) {
+ *IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS) ?
+ (Align % 4 == 0) : true;
+ }
+
+ return true;
+ }
+
// Smaller than dword value must be aligned.
- // FIXME: This should be allowed on CI+
if (VT.bitsLT(MVT::i32))
return false;
OpenPOWER on IntegriCloud