summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2017-04-24 19:40:59 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2017-04-24 19:40:59 +0000
commit0774ea267a2cae366a33434e2a0536db6428c3dc (patch)
tree70bfc17504073c2113cb0bc4fc6a5a8d9dfee583 /llvm/lib
parentdf6539f44b8b6a9bf93e5305e0bfbab9a707a443 (diff)
downloadbcm5719-llvm-0774ea267a2cae366a33434e2a0536db6428c3dc.tar.gz
bcm5719-llvm-0774ea267a2cae366a33434e2a0536db6428c3dc.zip
AMDGPU: Select scratch mubuf offsets when pointer is a constant
In call sequence setups, there may not be a frame index base and the pointer is a constant offset from the frame pointer / scratch wave offset register. llvm-svn: 301230
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp53
-rw-r--r--llvm/lib/Target/AMDGPU/BUFInstructions.td75
2 files changed, 94 insertions, 34 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 318de7f2e3d..eef59b343df 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -116,8 +116,11 @@ private:
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
SDValue &SLC) const;
- bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
- SDValue &SOffset, SDValue &ImmOffset) const;
+ bool SelectMUBUFScratchOffen(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
+ SDValue &SOffset, SDValue &ImmOffset) const;
+ bool SelectMUBUFScratchOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
+ SDValue &Offset) const;
+
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
SDValue &Offset, SDValue &GLC, SDValue &SLC,
SDValue &TFE) const;
@@ -953,8 +956,12 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
return true;
}
+static bool isLegalMUBUFImmOffset(unsigned Imm) {
+ return isUInt<12>(Imm);
+}
+
static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
- return isUInt<12>(Imm->getZExtValue());
+ return isLegalMUBUFImmOffset(Imm->getZExtValue());
}
bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
@@ -1076,9 +1083,9 @@ SDValue AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
return N;
}
-bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
- SDValue &VAddr, SDValue &SOffset,
- SDValue &ImmOffset) const {
+bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDValue Addr, SDValue &Rsrc,
+ SDValue &VAddr, SDValue &SOffset,
+ SDValue &ImmOffset) const {
SDLoc DL(Addr);
MachineFunction &MF = CurDAG->getMachineFunction();
@@ -1087,8 +1094,22 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
- // (add n0, c1)
+ if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
+ unsigned Imm = CAddr->getZExtValue();
+ assert(!isLegalMUBUFImmOffset(Imm) &&
+ "should have been selected by other pattern");
+
+ SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
+ MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
+ DL, MVT::i32, HighBits);
+ VAddr = SDValue(MovHighBits, 0);
+ ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
+ return true;
+ }
+
if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ // (add n0, c1)
+
SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);
@@ -1107,6 +1128,24 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
return true;
}
+bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDValue Addr,
+ SDValue &SRsrc,
+ SDValue &SOffset,
+ SDValue &Offset) const {
+ ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
+ if (!CAddr || !isLegalMUBUFImmOffset(CAddr))
+ return false;
+
+ SDLoc DL(Addr);
+ MachineFunction &MF = CurDAG->getMachineFunction();
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
+ SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
+ SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
+ return true;
+}
+
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &SOffset, SDValue &Offset,
SDValue &GLC, SDValue &SLC,
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index a6609f0725a..89eddb9ce96 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -11,7 +11,9 @@ def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">;
def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
-def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
+def MUBUFScratchOffen : ComplexPattern<i64, 4, "SelectMUBUFScratchOffen">;
+def MUBUFScratchOffset : ComplexPattern<i64, 3, "SelectMUBUFScratchOffset", [], [], 20>;
+
def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
@@ -958,21 +960,30 @@ defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, mubuf_az_extloadi8>;
} // End Predicates = [Has16BitInsts]
-class MUBUFScratchLoadPat <MUBUF_Pseudo Instr, ValueType vt, PatFrag ld> : Pat <
- (vt (ld (MUBUFScratch v4i32:$srsrc, i32:$vaddr,
- i32:$soffset, u16imm:$offset))),
- (Instr $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
->;
+multiclass MUBUFScratchLoadPat <MUBUF_Pseudo InstrOffen,
+ MUBUF_Pseudo InstrOffset,
+ ValueType vt, PatFrag ld> {
+ def : Pat <
+ (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
+ i32:$soffset, u16imm:$offset))),
+ (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
+ >;
+
+ def : Pat <
+ (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))),
+ (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0)
+ >;
+}
-def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i32, sextloadi8_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, i32, extloadi8_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i16, sextloadi8_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, i16, extloadi8_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, i32, sextloadi16_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, i32, extloadi16_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, i32, load_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, v2i32, load_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, v4i32, load_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, BUFFER_LOAD_SBYTE_OFFSET, i32, sextloadi8_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i32, extloadi8_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, BUFFER_LOAD_SBYTE_OFFSET, i16, sextloadi8_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i16, extloadi8_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, BUFFER_LOAD_SSHORT_OFFSET, i32, sextloadi16_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i32, extloadi16_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, BUFFER_LOAD_DWORD_OFFSET, i32, load_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, BUFFER_LOAD_DWORDX2_OFFSET, v2i32, load_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, BUFFER_LOAD_DWORDX4_OFFSET, v4i32, load_private>;
// BUFFER_LOAD_DWORD*, addr64=0
multiclass MUBUF_Load_Dword <ValueType vt,
@@ -1054,19 +1065,29 @@ multiclass MUBUFStore_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
defm : MUBUFStore_Pattern <BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_global>;
defm : MUBUFStore_Pattern <BUFFER_STORE_SHORT_OFFSET, i16, global_store>;
-class MUBUFScratchStorePat <MUBUF_Pseudo Instr, ValueType vt, PatFrag st> : Pat <
- (st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset,
- u16imm:$offset)),
- (Instr $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
->;
+multiclass MUBUFScratchStorePat <MUBUF_Pseudo InstrOffen,
+ MUBUF_Pseudo InstrOffset,
+ ValueType vt, PatFrag st> {
+ def : Pat <
+ (st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
+ i32:$soffset, u16imm:$offset)),
+ (InstrOffen $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
+ >;
+
+ def : Pat <
+ (st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset,
+ u16imm:$offset)),
+ (InstrOffset $value, $srsrc, $soffset, $offset, 0, 0, 0)
+ >;
+}
-def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i32, truncstorei8_private>;
-def : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, i32, truncstorei16_private>;
-def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i16, truncstorei8_private>;
-def : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, i16, store_private>;
-def : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, i32, store_private>;
-def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, v2i32, store_private>;
-def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, v4i32, store_private>;
+defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, i32, truncstorei8_private>;
+defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i32, truncstorei16_private>;
+defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_private>;
+defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i16, store_private>;
+defm : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, BUFFER_STORE_DWORD_OFFSET, i32, store_private>;
+defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, BUFFER_STORE_DWORDX2_OFFSET, v2i32, store_private>;
+defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private>;
//===----------------------------------------------------------------------===//
// MTBUF Patterns
OpenPOWER on IntegriCloud