diff options
| author | Marek Olsak <marek.olsak@amd.com> | 2017-10-31 21:06:42 +0000 |
|---|---|---|
| committer | Marek Olsak <marek.olsak@amd.com> | 2017-10-31 21:06:42 +0000 |
| commit | 5914ece6aacebf22e196c2a849026541724e9162 (patch) | |
| tree | 866e2e816854d39e744ce30a9707cd2e03558c7e | |
| parent | deb437b038ffad2f23fc21dd3ffb2b66cdd76b02 (diff) | |
| download | bcm5719-llvm-5914ece6aacebf22e196c2a849026541724e9162.tar.gz bcm5719-llvm-5914ece6aacebf22e196c2a849026541724e9162.zip | |
AMDGPU: Select s_buffer_load_dword with a non-constant SGPR offset
Summary:
Apps that benefit:
- alien isolation
- bioshock infinite
- civilization: beyond earth
- company of heroes 2
- dirt showdown
- dota 2
- F1 2015
- grid autosport
- hitman
- legend of grimrock
- serious sam 3: bfe
- shadow warrior
- talos principle
- total war: warhammer
- UE4 demos: effects cave, elemental, sun temple
Reviewers: arsenm, nhaehnle
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D38914
llvm-svn: 317038
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/BUFInstructions.td | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 21 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SMInstructions.td | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/smrd.ll | 16 |
5 files changed, 38 insertions, 17 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 1e4992555dc..c313e4a04ef 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -169,7 +169,6 @@ private: bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; - bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; @@ -1466,13 +1465,6 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, return !Imm && isa<ConstantSDNode>(Offset); } -bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, - SDValue &Offset) const { - bool Imm; - return SelectSMRDOffset(Addr, Offset, Imm) && !Imm && - !isa<ConstantSDNode>(Offset); -} - bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const { diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 6eb39aee893..351f52b6241 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -878,13 +878,6 @@ def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol", // MUBUF Patterns //===----------------------------------------------------------------------===// -// Offset in an 32-bit VGPR -def : GCNPat < - (SIload_constant v4i32:$sbase, i32:$voff), - (BUFFER_LOAD_DWORD_OFFEN $voff, $sbase, (i32 0), 0, 0, 0, 0) ->; - - //===----------------------------------------------------------------------===// // buffer_load/store_format patterns //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 06de0658a7d..542bfdaffbf 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3709,6 +3709,27 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32); Inst.eraseFromParent(); continue; + + case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR: { + unsigned VDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + + BuildMI(*MBB, Inst, Inst.getDebugLoc(), + get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), VDst) + .add(*getNamedOperand(Inst, AMDGPU::OpName::soff)) // vaddr + .add(*getNamedOperand(Inst, AMDGPU::OpName::sbase)) // srsrc + .addImm(0) // soffset + .addImm(0) // offset + .addImm(getNamedOperand(Inst, AMDGPU::OpName::glc)->getImm()) + .addImm(0) // slc + .addImm(0) // tfe + .setMemRefs(Inst.memoperands_begin(), Inst.memoperands_end()); + + MRI.replaceRegWith(getNamedOperand(Inst, AMDGPU::OpName::sdst)->getReg(), + VDst); + addUsersToMoveToVALUWorklist(VDst, MRI, Worklist); + Inst.eraseFromParent(); + continue; + } } if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td index 131cd2f990f..5e72a2e8828 100644 --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -239,7 +239,6 @@ def SMRDImm32 : ComplexPattern<i64, 2, "SelectSMRDImm32">; def SMRDSgpr : ComplexPattern<i64, 2, "SelectSMRDSgpr">; def SMRDBufferImm : ComplexPattern<i32, 1, "SelectSMRDBufferImm">; def SMRDBufferImm32 : ComplexPattern<i32, 1, "SelectSMRDBufferImm32">; -def SMRDBufferSgpr : ComplexPattern<i32, 1, "SelectSMRDBufferSgpr">; multiclass SMRD_Pattern <string Instr, ValueType vt> { @@ -282,7 +281,7 @@ def SM_LOAD_PATTERN : GCNPat < // name this pattern to reuse AddedComplexity on // 2. Offset loaded in an 32bit SGPR def : GCNPat < - (SIload_constant v4i32:$sbase, (SMRDBufferSgpr i32:$offset)), + (SIload_constant v4i32:$sbase, i32:$offset), (S_BUFFER_LOAD_DWORD_SGPR $sbase, $offset, 0) >; diff --git a/llvm/test/CodeGen/AMDGPU/smrd.ll b/llvm/test/CodeGen/AMDGPU/smrd.ll index a19768d72bd..cc2f6ce76b5 100644 --- a/llvm/test/CodeGen/AMDGPU/smrd.ll +++ b/llvm/test/CodeGen/AMDGPU/smrd.ll @@ -175,6 +175,22 @@ main_body: ret void } +; GCN-LABEL: {{^}}smrd_sgpr_offset: +; GCN: s_buffer_load_dword s{{[0-9]}}, s[0:3], s4 +define amdgpu_ps float @smrd_sgpr_offset(<4 x i32> inreg %desc, i32 inreg %offset) #0 { +main_body: + %r = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %offset) + ret float %r +} + +; GCN-LABEL: {{^}}smrd_vgpr_offset: +; GCN: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen ; +define amdgpu_ps float @smrd_vgpr_offset(<4 x i32> inreg %desc, i32 %offset) #0 { +main_body: + %r = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %offset) + ret float %r +} + declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1 |

