summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/R600/SIISelLowering.cpp2
-rw-r--r--llvm/lib/Target/R600/SIInstrFormats.td1
-rw-r--r--llvm/lib/Target/R600/SIInstrInfo.td17
-rw-r--r--llvm/lib/Target/R600/SIInstructions.td15
-rw-r--r--llvm/lib/Target/R600/SILoadStoreOptimizer.cpp10
-rw-r--r--llvm/lib/Target/R600/SILowerControlFlow.cpp23
-rw-r--r--llvm/test/CodeGen/R600/shl_add_ptr.ll3
7 files changed, 30 insertions, 41 deletions
diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp
index 8d4164a1c39..fb45684e4a4 100644
--- a/llvm/lib/Target/R600/SIISelLowering.cpp
+++ b/llvm/lib/Target/R600/SIISelLowering.cpp
@@ -1986,6 +1986,7 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
getTargetMachine().getSubtargetImpl()->getInstrInfo());
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
TII->legalizeOperands(MI);
if (TII->isMIMG(MI->getOpcode())) {
@@ -2005,7 +2006,6 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
unsigned NewOpcode = TII->getMaskedMIMGOp(MI->getOpcode(), BitsSet);
MI->setDesc(TII->get(NewOpcode));
- MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
MRI.setRegClass(VReg, RC);
return;
}
diff --git a/llvm/lib/Target/R600/SIInstrFormats.td b/llvm/lib/Target/R600/SIInstrFormats.td
index 10e0a3f0c13..ee1a52b2f8f 100644
--- a/llvm/lib/Target/R600/SIInstrFormats.td
+++ b/llvm/lib/Target/R600/SIInstrFormats.td
@@ -546,6 +546,7 @@ class DS <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
let LGKM_CNT = 1;
let UseNamedOperandTable = 1;
+ let DisableEncoding = "$m0";
}
class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
diff --git a/llvm/lib/Target/R600/SIInstrInfo.td b/llvm/lib/Target/R600/SIInstrInfo.td
index 713e84edefd..392c272a863 100644
--- a/llvm/lib/Target/R600/SIInstrInfo.td
+++ b/llvm/lib/Target/R600/SIInstrInfo.td
@@ -948,7 +948,7 @@ class DS_1A <bits<8> op, dag outs, dag ins, string asm, list<dag> pat> :
class DS_Load_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A <
op,
(outs regClass:$vdst),
- (ins i1imm:$gds, VReg_32:$addr, ds_offset:$offset),
+ (ins i1imm:$gds, VReg_32:$addr, ds_offset:$offset, M0Reg:$m0),
asm#" $vdst, $addr"#"$offset"#" [M0]",
[]> {
let data0 = 0;
@@ -960,7 +960,8 @@ class DS_Load_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A <
class DS_Load2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS <
op,
(outs regClass:$vdst),
- (ins i1imm:$gds, VReg_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1),
+ (ins i1imm:$gds, VReg_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1,
+ M0Reg:$m0),
asm#" $vdst, $addr"#"$offset0"#"$offset1 [M0]",
[]> {
let data0 = 0;
@@ -973,7 +974,7 @@ class DS_Load2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS <
class DS_Store_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A <
op,
(outs),
- (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, ds_offset:$offset),
+ (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, ds_offset:$offset, M0Reg:$m0),
asm#" $addr, $data0"#"$offset"#" [M0]",
[]> {
let data1 = 0;
@@ -986,7 +987,7 @@ class DS_Store2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS <
op,
(outs),
(ins i1imm:$gds, VReg_32:$addr, regClass:$data0, regClass:$data1,
- ds_offset0:$offset0, ds_offset1:$offset1),
+ ds_offset0:$offset0, ds_offset1:$offset1, M0Reg:$m0),
asm#" $addr, $data0, $data1"#"$offset0"#"$offset1 [M0]",
[]> {
let mayStore = 1;
@@ -999,7 +1000,7 @@ class DS_Store2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS <
class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc, string noRetOp = ""> : DS_1A <
op,
(outs rc:$vdst),
- (ins i1imm:$gds, VReg_32:$addr, rc:$data0, ds_offset:$offset),
+ (ins i1imm:$gds, VReg_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0),
asm#" $vdst, $addr, $data0"#"$offset"#" [M0]", []>,
AtomicNoRet<noRetOp, 1> {
@@ -1014,7 +1015,7 @@ class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc, string noRetOp = ""
class DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc, string noRetOp = ""> : DS_1A <
op,
(outs rc:$vdst),
- (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset),
+ (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0),
asm#" $vdst, $addr, $data0, $data1"#"$offset"#" [M0]",
[]>,
AtomicNoRet<noRetOp, 1> {
@@ -1027,7 +1028,7 @@ class DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc, string noRetOp = ""
class DS_1A2D_NORET <bits<8> op, string asm, RegisterClass rc, string noRetOp = asm> : DS_1A <
op,
(outs),
- (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset),
+ (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0),
asm#" $addr, $data0, $data1"#"$offset"#" [M0]",
[]>,
AtomicNoRet<noRetOp, 0> {
@@ -1039,7 +1040,7 @@ class DS_1A2D_NORET <bits<8> op, string asm, RegisterClass rc, string noRetOp =
class DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc, string noRetOp = asm> : DS_1A <
op,
(outs),
- (ins i1imm:$gds, VReg_32:$addr, rc:$data0, ds_offset:$offset),
+ (ins i1imm:$gds, VReg_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0),
asm#" $addr, $data0"#"$offset"#" [M0]",
[]>,
AtomicNoRet<noRetOp, 0> {
diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td
index bd91577a831..e1eb95580ac 100644
--- a/llvm/lib/Target/R600/SIInstructions.td
+++ b/llvm/lib/Target/R600/SIInstructions.td
@@ -2614,7 +2614,7 @@ def : ROTRPattern <V_ALIGNBIT_B32>;
class DSReadPat <DS inst, ValueType vt, PatFrag frag> : Pat <
(vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
- (inst (i1 0), $ptr, (as_i16imm $offset))
+ (inst (i1 0), $ptr, (as_i16imm $offset), (S_MOV_B32 -1))
>;
def : DSReadPat <DS_READ_I8, i32, sextloadi8_local>;
@@ -2632,12 +2632,12 @@ def : DSReadPat <DS_READ_B64, v2i32, local_load_aligned8bytes>;
def : Pat <
(v2i32 (local_load (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
i8:$offset1))),
- (DS_READ2_B32 (i1 0), $ptr, $offset0, $offset1)
+ (DS_READ2_B32 (i1 0), $ptr, $offset0, $offset1, (S_MOV_B32 -1))
>;
class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat <
(frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
- (inst (i1 0), $ptr, $value, (as_i16imm $offset))
+ (inst (i1 0), $ptr, $value, (as_i16imm $offset), (S_MOV_B32 -1))
>;
def : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
@@ -2653,12 +2653,13 @@ def : Pat <
(local_store v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
i8:$offset1)),
(DS_WRITE2_B32 (i1 0), $ptr, (EXTRACT_SUBREG $value, sub0),
- (EXTRACT_SUBREG $value, sub1), $offset0, $offset1)
+ (EXTRACT_SUBREG $value, sub1), $offset0, $offset1,
+ (S_MOV_B32 -1))
>;
class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
- (inst (i1 0), $ptr, $value, (as_i16imm $offset))
+ (inst (i1 0), $ptr, $value, (as_i16imm $offset), (S_MOV_B32 -1))
>;
// Special case of DSAtomicRetPat for add / sub 1 -> inc / dec
@@ -2674,13 +2675,13 @@ class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
class DSAtomicIncRetPat<DS inst, ValueType vt,
Instruction LoadImm, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)),
- (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset))
+ (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset), (S_MOV_B32 -1))
>;
class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
- (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset))
+ (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset), (S_MOV_B32 -1))
>;
diff --git a/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp b/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp
index 4140196e752..a092bcc2daf 100644
--- a/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/R600/SILoadStoreOptimizer.cpp
@@ -222,6 +222,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
// Be careful, since the addresses could be subregisters themselves in weird
// cases, like vectors of pointers.
const MachineOperand *AddrReg = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
+ const MachineOperand *M0Reg = TII->getNamedOperand(*I, AMDGPU::OpName::m0);
unsigned DestReg0 = TII->getNamedOperand(*I, AMDGPU::OpName::vdst)->getReg();
unsigned DestReg1
@@ -262,6 +263,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
.addOperand(*AddrReg) // addr
.addImm(NewOffset0) // offset0
.addImm(NewOffset1) // offset1
+ .addOperand(*M0Reg) // M0
.addMemOperand(*I->memoperands_begin())
.addMemOperand(*Paired->memoperands_begin());
@@ -280,6 +282,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
LiveInterval &AddrRegLI = LIS->getInterval(AddrReg->getReg());
LIS->shrinkToUses(&AddrRegLI);
+ LiveInterval &M0RegLI = LIS->getInterval(M0Reg->getReg());
+ LIS->shrinkToUses(&M0RegLI);
+
LIS->getInterval(DestReg); // Create new LI
DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n');
@@ -295,6 +300,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
// Be sure to use .addOperand(), and not .addReg() with these. We want to be
// sure we preserve the subregister index and any register flags set on them.
const MachineOperand *Addr = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
+ const MachineOperand *M0Reg = TII->getNamedOperand(*I, AMDGPU::OpName::m0);
const MachineOperand *Data0 = TII->getNamedOperand(*I, AMDGPU::OpName::data0);
const MachineOperand *Data1
= TII->getNamedOperand(*Paired, AMDGPU::OpName::data0);
@@ -333,11 +339,13 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
.addOperand(*Data1) // data1
.addImm(NewOffset0) // offset0
.addImm(NewOffset1) // offset1
+ .addOperand(*M0Reg) // m0
.addMemOperand(*I->memoperands_begin())
.addMemOperand(*Paired->memoperands_begin());
// XXX - How do we express subregisters here?
- unsigned OrigRegs[] = { Data0->getReg(), Data1->getReg(), Addr->getReg() };
+ unsigned OrigRegs[] = { Data0->getReg(), Data1->getReg(), Addr->getReg(),
+ M0Reg->getReg()};
LIS->RemoveMachineInstrFromMaps(I);
LIS->RemoveMachineInstrFromMaps(Paired);
diff --git a/llvm/lib/Target/R600/SILowerControlFlow.cpp b/llvm/lib/Target/R600/SILowerControlFlow.cpp
index 9702565c462..20e8cecdd29 100644
--- a/llvm/lib/Target/R600/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/R600/SILowerControlFlow.cpp
@@ -88,7 +88,6 @@ private:
void Kill(MachineInstr &MI);
void Branch(MachineInstr &MI);
- void InitM0ForLDS(MachineBasicBlock::iterator MI);
void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
void IndirectSrc(MachineInstr &MI);
void IndirectDst(MachineInstr &MI);
@@ -325,14 +324,6 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
MI.eraseFromParent();
}
-/// The m0 register stores the maximum allowable address for LDS reads and
-/// writes. Its value must be at least the size in bytes of LDS allocated by
-/// the shader. For simplicity, we set it to the maximum possible value.
-void SILowerControlFlowPass::InitM0ForLDS(MachineBasicBlock::iterator MI) {
- BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32),
- AMDGPU::M0).addImm(0xffffffff);
-}
-
void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
MachineBasicBlock &MBB = *MI.getParent();
@@ -391,12 +382,6 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
.addReg(Save);
}
- // FIXME: Are there any values other than the LDS address clamp that need to
- // be stored in the m0 register and may be live for more than a few
- // instructions? If so, we should save the m0 register at the beginning
- // of this function and restore it here.
- // FIXME: Add support for LDS direct loads.
- InitM0ForLDS(&MI);
MI.eraseFromParent();
}
@@ -465,7 +450,6 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
MachineInstr &MI = *I;
if (TII->isDS(MI.getOpcode())) {
- NeedM0 = true;
NeedWQM = true;
}
@@ -544,13 +528,6 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
}
}
- if (NeedM0) {
- MachineBasicBlock &MBB = MF.front();
- // Initialize M0 to a value that won't cause LDS access to be discarded
- // due to offset clamping
- InitM0ForLDS(MBB.getFirstNonPHI());
- }
-
if (NeedWQM && MFI->getShaderType() == ShaderType::PIXEL) {
MachineBasicBlock &MBB = MF.front();
BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
diff --git a/llvm/test/CodeGen/R600/shl_add_ptr.ll b/llvm/test/CodeGen/R600/shl_add_ptr.ll
index 047cf252e78..fdb3d390883 100644
--- a/llvm/test/CodeGen/R600/shl_add_ptr.ll
+++ b/llvm/test/CodeGen/R600/shl_add_ptr.ll
@@ -68,7 +68,8 @@ define void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)
; pointer can be used with an offset into the second one.
; SI-LABEL: {{^}}load_shl_base_lds_2:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: s_mov_b32 m0, -1
+; SI-NEXT: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI-NEXT: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9 [M0]
; SI: s_endpgm
define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
OpenPOWER on IntegriCloud