diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2015-06-04 16:17:42 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2015-06-04 16:17:42 +0000 |
commit | 73e06fa262a1c05a09e53e6d5394b73ea875c285 (patch) | |
tree | 1b366ad178af5313f13da035a53e4b7031cc31b5 /llvm/lib | |
parent | f72b49bc17cb57e7acdb78657e566c9bfe8d2907 (diff) | |
download | bcm5719-llvm-73e06fa262a1c05a09e53e6d5394b73ea875c285.tar.gz bcm5719-llvm-73e06fa262a1c05a09e53e6d5394b73ea875c285.zip |
R600/SI: Reimplement isLegalAddressingMode
Now that we sometimes know the address space, this can
theoretically do a better job.
This needs better test coverage, but this mostly depends on
first updating the loop optimizatiosn to provide the address
space.
llvm-svn: 239053
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/R600/AMDGPU.h | 5 | ||||
-rw-r--r-- | llvm/lib/Target/R600/SIISelLowering.cpp | 96 |
2 files changed, 70 insertions, 31 deletions
diff --git a/llvm/lib/Target/R600/AMDGPU.h b/llvm/lib/Target/R600/AMDGPU.h index 9b360637203..f014d7ac48c 100644 --- a/llvm/lib/Target/R600/AMDGPU.h +++ b/llvm/lib/Target/R600/AMDGPU.h @@ -137,7 +137,10 @@ enum AddressSpaces { CONSTANT_BUFFER_14 = 22, CONSTANT_BUFFER_15 = 23, ADDRESS_NONE = 24, ///< Address space for unknown memory. - LAST_ADDRESS = ADDRESS_NONE + LAST_ADDRESS = ADDRESS_NONE, + + // Some places use this if the address space can't be determined. + UNKNOWN_ADDRESS_SPACE = ~0u }; } // namespace AMDGPUAS diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp index 565e371d366..28694103c71 100644 --- a/llvm/lib/Target/R600/SIISelLowering.cpp +++ b/llvm/lib/Target/R600/SIISelLowering.cpp @@ -250,47 +250,83 @@ bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &, return false; } -// FIXME: This really needs an address space argument. The immediate offset -// size is different for different sets of memory instruction sets. - -// The single offset DS instructions have a 16-bit unsigned byte offset. -// -// MUBUF / MTBUF have a 12-bit unsigned byte offset, and additionally can do r + -// r + i with addr64. 32-bit has more addressing mode options. Depending on the -// resource constant, it can also do (i64 r0) + (i32 r1) * (i14 i). -// -// SMRD instructions have an 8-bit, dword offset. -// bool SITargetLowering::isLegalAddressingMode(const AddrMode &AM, Type *Ty, unsigned AS) const { // No global is ever allowed as a base. if (AM.BaseGV) return false; - // Allow a 16-bit unsigned immediate field, since this is what DS instructions - // use. - if (!isUInt<16>(AM.BaseOffs)) - return false; + switch (AS) { + case AMDGPUAS::GLOBAL_ADDRESS: + case AMDGPUAS::CONSTANT_ADDRESS: // XXX - Should we assume SMRD instructions? + case AMDGPUAS::PRIVATE_ADDRESS: + case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: { + // MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and + // additionally can do r + r + i with addr64. 32-bit has more addressing + // mode options. Depending on the resource constant, it can also do + // (i64 r0) + (i32 r1) * (i14 i). + // + // SMRD instructions have an 8-bit, dword offset. + // + // Assume nonunifom access, since the address space isn't enough to know + // what instruction we will use, and since we don't know if this is a load + // or store and scalar stores are only available on VI. + // + // We also know if we are doing an extload, we can't do a scalar load. + // + // Private arrays end up using a scratch buffer most of the time, so also + // assume those use MUBUF instructions. Scratch loads / stores are currently + // implemented as mubuf instructions with offen bit set, so slightly + // different than the normal addr64. + if (!isUInt<12>(AM.BaseOffs)) + return false; - // Only support r+r, - switch (AM.Scale) { - case 0: // "r+i" or just "i", depending on HasBaseReg. - break; - case 1: - if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. + // FIXME: Since we can split immediate into soffset and immediate offset, + // would it make sense to allow any immediate? + + switch (AM.Scale) { + case 0: // r + i or just i, depending on HasBaseReg. + return true; + case 1: + return true; // We have r + r or r + i. + case 2: + if (AM.HasBaseReg) { + // Reject 2 * r + r. + return false; + } + + // Allow 2 * r as r + r + // Or 2 * r + i is allowed as r + r + i. + return true; + default: // Don't allow n * r return false; - // Otherwise we have r+r or r+i. - break; - case 2: - if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. + } + } + case AMDGPUAS::LOCAL_ADDRESS: + case AMDGPUAS::REGION_ADDRESS: { + // Basic, single offset DS instructions allow a 16-bit unsigned immediate + // field. + // XXX - If doing a 4-byte aligned 8-byte type access, we effectively have + // an 8-bit dword offset but we don't know the alignment here. + if (!isUInt<16>(AM.BaseOffs)) return false; - // Allow 2*r as r+r. - break; - default: // Don't allow n * r + + if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg. + return true; + + if (AM.Scale == 1 && AM.HasBaseReg) + return true; + return false; } - - return true; + case AMDGPUAS::FLAT_ADDRESS: { + // Flat instructions do not have offsets, and only have the register + // address. + return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1); + } + default: + llvm_unreachable("unhandled address space"); + } } bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT, |