diff options
-rw-r--r-- | llvm/lib/Target/R600/SIISelLowering.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/R600/SIInstrFormats.td | 1 | ||||
-rw-r--r-- | llvm/lib/Target/R600/SIInstrInfo.td | 17 | ||||
-rw-r--r-- | llvm/lib/Target/R600/SIInstructions.td | 15 | ||||
-rw-r--r-- | llvm/lib/Target/R600/SILoadStoreOptimizer.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/Target/R600/SILowerControlFlow.cpp | 23 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/shl_add_ptr.ll | 3 |
7 files changed, 30 insertions, 41 deletions
diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp index 8d4164a1c39..fb45684e4a4 100644 --- a/llvm/lib/Target/R600/SIISelLowering.cpp +++ b/llvm/lib/Target/R600/SIISelLowering.cpp @@ -1986,6 +1986,7 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, const SIInstrInfo *TII = static_cast<const SIInstrInfo *>( getTargetMachine().getSubtargetImpl()->getInstrInfo()); + MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); TII->legalizeOperands(MI); if (TII->isMIMG(MI->getOpcode())) { @@ -2005,7 +2006,6 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, unsigned NewOpcode = TII->getMaskedMIMGOp(MI->getOpcode(), BitsSet); MI->setDesc(TII->get(NewOpcode)); - MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); MRI.setRegClass(VReg, RC); return; } diff --git a/llvm/lib/Target/R600/SIInstrFormats.td b/llvm/lib/Target/R600/SIInstrFormats.td index 10e0a3f0c13..ee1a52b2f8f 100644 --- a/llvm/lib/Target/R600/SIInstrFormats.td +++ b/llvm/lib/Target/R600/SIInstrFormats.td @@ -546,6 +546,7 @@ class DS <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> : let LGKM_CNT = 1; let UseNamedOperandTable = 1; + let DisableEncoding = "$m0"; } class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : diff --git a/llvm/lib/Target/R600/SIInstrInfo.td b/llvm/lib/Target/R600/SIInstrInfo.td index 713e84edefd..392c272a863 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.td +++ b/llvm/lib/Target/R600/SIInstrInfo.td @@ -948,7 +948,7 @@ class DS_1A <bits<8> op, dag outs, dag ins, string asm, list<dag> pat> : class DS_Load_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A < op, (outs regClass:$vdst), - (ins i1imm:$gds, VReg_32:$addr, ds_offset:$offset), + (ins i1imm:$gds, VReg_32:$addr, ds_offset:$offset, M0Reg:$m0), asm#" $vdst, $addr"#"$offset"#" [M0]", []> { let data0 = 0; @@ -960,7 +960,8 @@ class DS_Load_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A < class DS_Load2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS < op, (outs regClass:$vdst), - (ins i1imm:$gds, VReg_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1), + (ins i1imm:$gds, VReg_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1, + M0Reg:$m0), asm#" $vdst, $addr"#"$offset0"#"$offset1 [M0]", []> { let data0 = 0; @@ -973,7 +974,7 @@ class DS_Load2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS < class DS_Store_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A < op, (outs), - (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, ds_offset:$offset), + (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, ds_offset:$offset, M0Reg:$m0), asm#" $addr, $data0"#"$offset"#" [M0]", []> { let data1 = 0; @@ -986,7 +987,7 @@ class DS_Store2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS < op, (outs), (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, regClass:$data1, - ds_offset0:$offset0, ds_offset1:$offset1), + ds_offset0:$offset0, ds_offset1:$offset1, M0Reg:$m0), asm#" $addr, $data0, $data1"#"$offset0"#"$offset1 [M0]", []> { let mayStore = 1; @@ -999,7 +1000,7 @@ class DS_Store2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS < class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc, string noRetOp = ""> : DS_1A < op, (outs rc:$vdst), - (ins i1imm:$gds, VReg_32:$addr, rc:$data0, ds_offset:$offset), + (ins i1imm:$gds, VReg_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0), asm#" $vdst, $addr, $data0"#"$offset"#" [M0]", []>, AtomicNoRet<noRetOp, 1> { @@ -1014,7 +1015,7 @@ class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc, string noRetOp = "" class DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc, string noRetOp = ""> : DS_1A < op, (outs rc:$vdst), - (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset), + (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0), asm#" $vdst, $addr, $data0, $data1"#"$offset"#" [M0]", []>, AtomicNoRet<noRetOp, 1> { @@ -1027,7 +1028,7 @@ class DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc, string noRetOp = "" class DS_1A2D_NORET <bits<8> op, string asm, RegisterClass rc, string noRetOp = asm> : DS_1A < op, (outs), - (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset), + (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0), asm#" $addr, $data0, $data1"#"$offset"#" [M0]", []>, AtomicNoRet<noRetOp, 0> { @@ -1039,7 +1040,7 @@ class DS_1A2D_NORET <bits<8> op, string asm, RegisterClass rc, string noRetOp = class DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc, string noRetOp = asm> : DS_1A < op, (outs), - (ins i1imm:$gds, VReg_32:$addr, rc:$data0, ds_offset:$offset), + (ins i1imm:$gds, VReg_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0), asm#" $addr, $data0"#"$offset"#" [M0]", []>, AtomicNoRet<noRetOp, 0> { diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td index bd91577a831..e1eb95580ac 100644 --- a/llvm/lib/Target/R600/SIInstructions.td +++ b/llvm/lib/Target/R600/SIInstructions.td @@ -2614,7 +2614,7 @@ def : ROTRPattern <V_ALIGNBIT_B32>; class DSReadPat <DS inst, ValueType vt, PatFrag frag> : Pat < (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))), - (inst (i1 0), $ptr, (as_i16imm $offset)) + (inst (i1 0), $ptr, (as_i16imm $offset), (S_MOV_B32 -1)) >; def : DSReadPat <DS_READ_I8, i32, sextloadi8_local>; @@ -2632,12 +2632,12 @@ def : DSReadPat <DS_READ_B64, v2i32, local_load_aligned8bytes>; def : Pat < (v2i32 (local_load (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1))), - (DS_READ2_B32 (i1 0), $ptr, $offset0, $offset1) + (DS_READ2_B32 (i1 0), $ptr, $offset0, $offset1, (S_MOV_B32 -1)) >; class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat < (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)), - (inst (i1 0), $ptr, $value, (as_i16imm $offset)) + (inst (i1 0), $ptr, $value, (as_i16imm $offset), (S_MOV_B32 -1)) >; def : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>; @@ -2653,12 +2653,13 @@ def : Pat < (local_store v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1)), (DS_WRITE2_B32 (i1 0), $ptr, (EXTRACT_SUBREG $value, sub0), - (EXTRACT_SUBREG $value, sub1), $offset0, $offset1) + (EXTRACT_SUBREG $value, sub1), $offset0, $offset1, + (S_MOV_B32 -1)) >; class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat < (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value), - (inst (i1 0), $ptr, $value, (as_i16imm $offset)) + (inst (i1 0), $ptr, $value, (as_i16imm $offset), (S_MOV_B32 -1)) >; // Special case of DSAtomicRetPat for add / sub 1 -> inc / dec @@ -2674,13 +2675,13 @@ class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat < class DSAtomicIncRetPat<DS inst, ValueType vt, Instruction LoadImm, PatFrag frag> : Pat < (frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)), - (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset)) + (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset), (S_MOV_B32 -1)) >; class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat < (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap), - (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset)) + (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset), (S_MOV_B32 -1)) >; diff --git a/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp b/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp index 4140196e752..a092bcc2daf 100644 --- a/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp @@ -222,6 +222,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair( // Be careful, since the addresses could be subregisters themselves in weird // cases, like vectors of pointers. const MachineOperand *AddrReg = TII->getNamedOperand(*I, AMDGPU::OpName::addr); + const MachineOperand *M0Reg = TII->getNamedOperand(*I, AMDGPU::OpName::m0); unsigned DestReg0 = TII->getNamedOperand(*I, AMDGPU::OpName::vdst)->getReg(); unsigned DestReg1 @@ -262,6 +263,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair( .addOperand(*AddrReg) // addr .addImm(NewOffset0) // offset0 .addImm(NewOffset1) // offset1 + .addOperand(*M0Reg) // M0 .addMemOperand(*I->memoperands_begin()) .addMemOperand(*Paired->memoperands_begin()); @@ -280,6 +282,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair( LiveInterval &AddrRegLI = LIS->getInterval(AddrReg->getReg()); LIS->shrinkToUses(&AddrRegLI); + LiveInterval &M0RegLI = LIS->getInterval(M0Reg->getReg()); + LIS->shrinkToUses(&M0RegLI); + LIS->getInterval(DestReg); // Create new LI DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n'); @@ -295,6 +300,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair( // Be sure to use .addOperand(), and not .addReg() with these. We want to be // sure we preserve the subregister index and any register flags set on them. const MachineOperand *Addr = TII->getNamedOperand(*I, AMDGPU::OpName::addr); + const MachineOperand *M0Reg = TII->getNamedOperand(*I, AMDGPU::OpName::m0); const MachineOperand *Data0 = TII->getNamedOperand(*I, AMDGPU::OpName::data0); const MachineOperand *Data1 = TII->getNamedOperand(*Paired, AMDGPU::OpName::data0); @@ -333,11 +339,13 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair( .addOperand(*Data1) // data1 .addImm(NewOffset0) // offset0 .addImm(NewOffset1) // offset1 + .addOperand(*M0Reg) // m0 .addMemOperand(*I->memoperands_begin()) .addMemOperand(*Paired->memoperands_begin()); // XXX - How do we express subregisters here? - unsigned OrigRegs[] = { Data0->getReg(), Data1->getReg(), Addr->getReg() }; + unsigned OrigRegs[] = { Data0->getReg(), Data1->getReg(), Addr->getReg(), + M0Reg->getReg()}; LIS->RemoveMachineInstrFromMaps(I); LIS->RemoveMachineInstrFromMaps(Paired); diff --git a/llvm/lib/Target/R600/SILowerControlFlow.cpp b/llvm/lib/Target/R600/SILowerControlFlow.cpp index 9702565c462..20e8cecdd29 100644 --- a/llvm/lib/Target/R600/SILowerControlFlow.cpp +++ b/llvm/lib/Target/R600/SILowerControlFlow.cpp @@ -88,7 +88,6 @@ private: void Kill(MachineInstr &MI); void Branch(MachineInstr &MI); - void InitM0ForLDS(MachineBasicBlock::iterator MI); void LoadM0(MachineInstr &MI, MachineInstr *MovRel); void IndirectSrc(MachineInstr &MI); void IndirectDst(MachineInstr &MI); @@ -325,14 +324,6 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) { MI.eraseFromParent(); } -/// The m0 register stores the maximum allowable address for LDS reads and -/// writes. Its value must be at least the size in bytes of LDS allocated by -/// the shader. For simplicity, we set it to the maximum possible value. -void SILowerControlFlowPass::InitM0ForLDS(MachineBasicBlock::iterator MI) { - BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), - AMDGPU::M0).addImm(0xffffffff); -} - void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) { MachineBasicBlock &MBB = *MI.getParent(); @@ -391,12 +382,6 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) { .addReg(Save); } - // FIXME: Are there any values other than the LDS address clamp that need to - // be stored in the m0 register and may be live for more than a few - // instructions? If so, we should save the m0 register at the beginning - // of this function and restore it here. - // FIXME: Add support for LDS direct loads. - InitM0ForLDS(&MI); MI.eraseFromParent(); } @@ -465,7 +450,6 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { MachineInstr &MI = *I; if (TII->isDS(MI.getOpcode())) { - NeedM0 = true; NeedWQM = true; } @@ -544,13 +528,6 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { } } - if (NeedM0) { - MachineBasicBlock &MBB = MF.front(); - // Initialize M0 to a value that won't cause LDS access to be discarded - // due to offset clamping - InitM0ForLDS(MBB.getFirstNonPHI()); - } - if (NeedWQM && MFI->getShaderType() == ShaderType::PIXEL) { MachineBasicBlock &MBB = MF.front(); BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64), diff --git a/llvm/test/CodeGen/R600/shl_add_ptr.ll b/llvm/test/CodeGen/R600/shl_add_ptr.ll index 047cf252e78..fdb3d390883 100644 --- a/llvm/test/CodeGen/R600/shl_add_ptr.ll +++ b/llvm/test/CodeGen/R600/shl_add_ptr.ll @@ -68,7 +68,8 @@ define void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3) ; pointer can be used with an offset into the second one. ; SI-LABEL: {{^}}load_shl_base_lds_2: -; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}} +; SI: s_mov_b32 m0, -1 +; SI-NEXT: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}} ; SI-NEXT: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9 [M0] ; SI: s_endpgm define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 { |