summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp23
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td2
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.td2
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll5
-rw-r--r--llvm/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir34
-rw-r--r--llvm/test/CodeGen/AMDGPU/shrink-carry.mir16
-rw-r--r--llvm/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir8
9 files changed, 61 insertions, 41 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 6a751d71db2..3a125c2e7e4 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3012,15 +3012,18 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned SrcCondCopy = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), SrcCondCopy)
+ .addReg(SrcCond);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
.addReg(Src0, 0, AMDGPU::sub0)
.addReg(Src1, 0, AMDGPU::sub0)
- .addReg(SrcCond);
+ .addReg(SrcCondCopy);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
.addReg(Src0, 0, AMDGPU::sub1)
.addReg(Src1, 0, AMDGPU::sub1)
- .addReg(SrcCond);
+ .addReg(SrcCondCopy);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::REG_SEQUENCE), Dst)
.addReg(DstLo)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index b2fbcce66d5..72d6119a6ba 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -649,15 +649,18 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
"Not a VGPR32 reg");
if (Cond.size() == 1) {
+ unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+ BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
+ .add(Cond[0]);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addReg(FalseReg)
.addReg(TrueReg)
- .add(Cond[0]);
+ .addReg(SReg);
} else if (Cond.size() == 2) {
assert(Cond[0].isImm() && "Cond[0] is not an immediate");
switch (Cond[0].getImm()) {
case SIInstrInfo::SCC_TRUE: {
- unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
.addImm(-1)
.addImm(0);
@@ -668,7 +671,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
break;
}
case SIInstrInfo::SCC_FALSE: {
- unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
.addImm(0)
.addImm(-1);
@@ -681,23 +684,29 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
case SIInstrInfo::VCCNZ: {
MachineOperand RegOp = Cond[1];
RegOp.setImplicit(false);
+ unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+ BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
+ .add(RegOp);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addReg(FalseReg)
.addReg(TrueReg)
- .add(RegOp);
+ .addReg(SReg);
break;
}
case SIInstrInfo::VCCZ: {
MachineOperand RegOp = Cond[1];
RegOp.setImplicit(false);
+ unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+ BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
+ .add(RegOp);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addReg(TrueReg)
.addReg(FalseReg)
- .add(RegOp);
+ .addReg(SReg);
break;
}
case SIInstrInfo::EXECNZ: {
- unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
.addImm(0);
@@ -711,7 +720,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
break;
}
case SIInstrInfo::EXECZ: {
- unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
.addImm(0);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index e3bed5eb3db..c0a844e255c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -994,7 +994,7 @@ class getVOP3SrcForVT<ValueType VT> {
VCSrc_f64,
VCSrc_b64),
!if(!eq(VT.Value, i1.Value),
- SCSrc_b64,
+ SCSrc_i1,
!if(isFP,
!if(!eq(VT.Value, f16.Value),
VCSrc_f16,
diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index ba616ada0c9..3880d052bf8 100644
--- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -121,11 +121,14 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
}
}
+ unsigned int TmpSrc = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::COPY), TmpSrc)
+ .add(Src);
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64))
.add(Dst)
.addImm(0)
.addImm(-1)
- .add(Src);
+ .addReg(TmpSrc);
MI.eraseFromParent();
} else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) &&
SrcRC == &AMDGPU::VReg_1RegClass) {
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index d685326c9b5..5062a626d94 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -483,6 +483,8 @@ defm SSrc : RegImmOperand<"SReg", "SSrc">;
defm SCSrc : RegInlineOperand<"SReg", "SCSrc"> ;
+def SCSrc_i1 : RegisterOperand<SReg_64_XEXEC>;
+
//===----------------------------------------------------------------------===//
// VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll
index 1c3cba8d3e4..3061bd91c9c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll
@@ -1,7 +1,10 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
; CHECK-LABEL: {{^}}test1:
-; CHECK: v_cndmask_b32_e64 v0, 0, 1, exec
+; CHECK: s_mov_b64 s[0:1], exec
+; CHECK: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+;
+; Note: The hardware doesn't implement EXEC as src2 for v_cndmask.
;
; Note: We could generate better code here if we recognized earlier that
; there is no WQM use and therefore llvm.amdgcn.ps.live is constant. However,
diff --git a/llvm/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir b/llvm/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir
index 2a431fe7946..dff9024df62 100644
--- a/llvm/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir
+++ b/llvm/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir
@@ -9,9 +9,9 @@ registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
- - { id: 3, class: sreg_64 }
+ - { id: 3, class: sreg_64_xexec }
- { id: 4, class: vgpr_32 }
- - { id: 5, class: sreg_64 }
+ - { id: 5, class: sreg_64_xexec }
- { id: 6, class: vgpr_32 }
- { id: 7, class: vgpr_32 }
@@ -42,13 +42,13 @@ registers:
- { id: 6, class: vgpr_32 }
- { id: 7, class: vgpr_32 }
- { id: 8, class: vgpr_32 }
- - { id: 9, class: sreg_64 }
+ - { id: 9, class: sreg_64_xexec }
- { id: 10, class: vgpr_32 }
- - { id: 11, class: sreg_64 }
+ - { id: 11, class: sreg_64_xexec }
- { id: 12, class: vgpr_32 }
- - { id: 13, class: sreg_64 }
+ - { id: 13, class: sreg_64_xexec }
- { id: 14, class: vgpr_32 }
- - { id: 15, class: sreg_64 }
+ - { id: 15, class: sreg_64_xexec }
body: |
bb.0:
@@ -77,9 +77,9 @@ name: cluster_mov_addc
registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
- - { id: 2, class: sreg_64 }
+ - { id: 2, class: sreg_64_xexec }
- { id: 3, class: vgpr_32 }
- - { id: 4, class: sreg_64 }
+ - { id: 4, class: sreg_64_xexec }
- { id: 6, class: vgpr_32 }
- { id: 7, class: vgpr_32 }
@@ -104,12 +104,12 @@ registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
- - { id: 3, class: sreg_64 }
+ - { id: 3, class: sreg_64_xexec }
- { id: 4, class: vgpr_32 }
- - { id: 5, class: sreg_64 }
+ - { id: 5, class: sreg_64_xexec }
- { id: 6, class: vgpr_32 }
- { id: 7, class: vgpr_32 }
- - { id: 8, class: sreg_64 }
+ - { id: 8, class: sreg_64_xexec }
body: |
bb.0:
%0 = V_MOV_B32_e32 0, implicit %exec
@@ -130,9 +130,9 @@ registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
- - { id: 3, class: sreg_64 }
+ - { id: 3, class: sreg_64_xexec }
- { id: 4, class: vgpr_32 }
- - { id: 5, class: sreg_64 }
+ - { id: 5, class: sreg_64_xexec }
- { id: 6, class: vgpr_32 }
- { id: 7, class: vgpr_32 }
@@ -156,9 +156,9 @@ registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
- - { id: 3, class: sreg_64 }
+ - { id: 3, class: sreg_64_xexec }
- { id: 4, class: vgpr_32 }
- - { id: 5, class: sreg_64 }
+ - { id: 5, class: sreg_64_xexec }
- { id: 6, class: vgpr_32 }
- { id: 7, class: vgpr_32 }
@@ -181,7 +181,7 @@ registers:
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
- { id: 3, class: vgpr_32 }
- - { id: 4, class: sreg_64 }
+ - { id: 4, class: sreg_64_xexec }
- { id: 5, class: vgpr_32 }
- { id: 6, class: vgpr_32 }
- { id: 7, class: vgpr_32 }
@@ -210,7 +210,7 @@ registers:
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
- { id: 3, class: vgpr_32 }
- - { id: 4, class: sreg_64 }
+ - { id: 4, class: sreg_64_xexec }
- { id: 5, class: vgpr_32 }
- { id: 6, class: vgpr_32 }
- { id: 7, class: vgpr_32 }
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-carry.mir b/llvm/test/CodeGen/AMDGPU/shrink-carry.mir
index d5d6223cc06..cf000ffa774 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-carry.mir
+++ b/llvm/test/CodeGen/AMDGPU/shrink-carry.mir
@@ -10,9 +10,9 @@ registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
- - { id: 3, class: sreg_64 }
+ - { id: 3, class: sreg_64_xexec }
- { id: 4, class: vgpr_32 }
- - { id: 5, class: sreg_64 }
+ - { id: 5, class: sreg_64_xexec }
body: |
bb.0:
@@ -34,9 +34,9 @@ registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
- - { id: 3, class: sreg_64 }
+ - { id: 3, class: sreg_64_xexec }
- { id: 4, class: vgpr_32 }
- - { id: 5, class: sreg_64 }
+ - { id: 5, class: sreg_64_xexec }
body: |
bb.0:
@@ -58,9 +58,9 @@ registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
- - { id: 3, class: sreg_64 }
+ - { id: 3, class: sreg_64_xexec }
- { id: 4, class: vgpr_32 }
- - { id: 5, class: sreg_64 }
+ - { id: 5, class: sreg_64_xexec }
body: |
bb.0:
@@ -82,9 +82,9 @@ registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
- - { id: 3, class: sreg_64 }
+ - { id: 3, class: sreg_64_xexec }
- { id: 4, class: vgpr_32 }
- - { id: 5, class: sreg_64 }
+ - { id: 5, class: sreg_64_xexec }
body: |
bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir b/llvm/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir
index 767118eb8d1..b1fdc5f8045 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir
+++ b/llvm/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir
@@ -27,7 +27,7 @@ registers:
- { id: 6, class: sreg_32 }
- { id: 7, class: sreg_32 }
- { id: 8, class: sreg_32_xm0 }
- - { id: 9, class: sreg_64 }
+ - { id: 9, class: sreg_64_xexec }
- { id: 10, class: sreg_32_xm0 }
- { id: 11, class: sreg_32_xm0 }
- { id: 12, class: sgpr_64 }
@@ -111,7 +111,7 @@ registers:
- { id: 6, class: sreg_32 }
- { id: 7, class: sreg_32 }
- { id: 8, class: sreg_32_xm0 }
- - { id: 9, class: sreg_64 }
+ - { id: 9, class: sreg_64_xexec }
- { id: 10, class: sreg_32_xm0 }
- { id: 11, class: sreg_32_xm0 }
- { id: 12, class: sgpr_64 }
@@ -195,7 +195,7 @@ registers:
- { id: 6, class: sreg_32 }
- { id: 7, class: sreg_32 }
- { id: 8, class: sreg_32_xm0 }
- - { id: 9, class: sreg_64 }
+ - { id: 9, class: sreg_64_xexec }
- { id: 10, class: sreg_32_xm0 }
- { id: 11, class: sreg_32_xm0 }
- { id: 12, class: sgpr_64 }
@@ -278,7 +278,7 @@ registers:
- { id: 6, class: sreg_32 }
- { id: 7, class: sreg_32 }
- { id: 8, class: sreg_32_xm0 }
- - { id: 9, class: sreg_64 }
+ - { id: 9, class: sreg_64_xexec }
- { id: 10, class: sreg_32_xm0 }
- { id: 11, class: sreg_32_xm0 }
- { id: 12, class: sgpr_64 }
OpenPOWER on IntegriCloud