diff options
| author | Nicolai Haehnle <nhaehnle@gmail.com> | 2017-09-29 15:37:31 +0000 |
|---|---|---|
| committer | Nicolai Haehnle <nhaehnle@gmail.com> | 2017-09-29 15:37:31 +0000 |
| commit | ce4ddd06dad33ce272dce9784a0edd1d957ab208 (patch) | |
| tree | 3b82546c98ac13bbe87342ed7876aff4b4f10342 /llvm/lib | |
| parent | 8fb270c691631cca5a99cfb26c7b238581b10a49 (diff) | |
| download | bcm5719-llvm-ce4ddd06dad33ce272dce9784a0edd1d957ab208.tar.gz bcm5719-llvm-ce4ddd06dad33ce272dce9784a0edd1d957ab208.zip | |
AMDGPU: VALU carry-in and v_cndmask condition cannot be EXEC
The hardware will only forward EXEC_LO; the high 32 bits will be zero.
Additionally, inline constants do not work. At least,
v_addc_u32_e64 v0, vcc, v0, v1, -1
which could conceivably be used to combine (v0 + v1 + 1) into a single
instruction, acts as if all carry-in bits are zero.
The llvm.amdgcn.ps.live test is adjusted; it would be nice to combine
s_mov_b64 s[0:1], exec
v_cndmask_b32_e64 v0, v1, v2, s[0:1]
into
v_mov_b32 v0, v3
but it's not particularly high priority.
Fixes dEQP-GLES31.functional.shaders.helper_invocation.value.*
llvm-svn: 314522
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 23 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 2 |
5 files changed, 28 insertions, 11 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 6a751d71db2..3a125c2e7e4 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3012,15 +3012,18 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned SrcCondCopy = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); + BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), SrcCondCopy) + .addReg(SrcCond); BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstLo) .addReg(Src0, 0, AMDGPU::sub0) .addReg(Src1, 0, AMDGPU::sub0) - .addReg(SrcCond); + .addReg(SrcCondCopy); BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstHi) .addReg(Src0, 0, AMDGPU::sub1) .addReg(Src1, 0, AMDGPU::sub1) - .addReg(SrcCond); + .addReg(SrcCondCopy); BuildMI(*BB, MI, DL, TII->get(AMDGPU::REG_SEQUENCE), Dst) .addReg(DstLo) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index b2fbcce66d5..72d6119a6ba 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -649,15 +649,18 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, "Not a VGPR32 reg"); if (Cond.size() == 1) { + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); + BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) + .add(Cond[0]); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addReg(FalseReg) .addReg(TrueReg) - .add(Cond[0]); + .addReg(SReg); } else if (Cond.size() == 2) { assert(Cond[0].isImm() && "Cond[0] is not an immediate"); switch (Cond[0].getImm()) { case SIInstrInfo::SCC_TRUE: { - unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg) .addImm(-1) .addImm(0); @@ -668,7 +671,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, break; } case SIInstrInfo::SCC_FALSE: { - unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg) .addImm(0) .addImm(-1); @@ -681,23 +684,29 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, case SIInstrInfo::VCCNZ: { MachineOperand RegOp = Cond[1]; RegOp.setImplicit(false); + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); + BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) + .add(RegOp); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addReg(FalseReg) .addReg(TrueReg) - .add(RegOp); + .addReg(SReg); break; } case SIInstrInfo::VCCZ: { MachineOperand RegOp = Cond[1]; RegOp.setImplicit(false); + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); + BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) + .add(RegOp); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addReg(TrueReg) .addReg(FalseReg) - .add(RegOp); + .addReg(SReg); break; } case SIInstrInfo::EXECNZ: { - unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2) .addImm(0); @@ -711,7 +720,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, break; } case SIInstrInfo::EXECZ: { - unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2) .addImm(0); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index e3bed5eb3db..c0a844e255c 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -994,7 +994,7 @@ class getVOP3SrcForVT<ValueType VT> { VCSrc_f64, VCSrc_b64), !if(!eq(VT.Value, i1.Value), - SCSrc_b64, + SCSrc_i1, !if(isFP, !if(!eq(VT.Value, f16.Value), VCSrc_f16, diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp index ba616ada0c9..3880d052bf8 100644 --- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -121,11 +121,14 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) { } } + unsigned int TmpSrc = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::COPY), TmpSrc) + .add(Src); BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64)) .add(Dst) .addImm(0) .addImm(-1) - .add(Src); + .addReg(TmpSrc); MI.eraseFromParent(); } else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) && SrcRC == &AMDGPU::VReg_1RegClass) { diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index d685326c9b5..5062a626d94 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -483,6 +483,8 @@ defm SSrc : RegImmOperand<"SReg", "SSrc">; defm SCSrc : RegInlineOperand<"SReg", "SCSrc"> ; +def SCSrc_i1 : RegisterOperand<SReg_64_XEXEC>; + //===----------------------------------------------------------------------===// // VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate //===----------------------------------------------------------------------===// |

