summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-08-27 01:00:37 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-08-27 01:00:37 +0000
commit22e417956d67efb602a756eb95ab61e48482a3f6 (patch)
tree74758416de5ce33c8222dceabce92589cd341b53 /llvm
parente9497444745e727ccd1560d7b0ebf45feb606918 (diff)
downloadbcm5719-llvm-22e417956d67efb602a756eb95ab61e48482a3f6.tar.gz
bcm5719-llvm-22e417956d67efb602a756eb95ab61e48482a3f6.zip
AMDGPU: Move cndmask pseudo to be isel pseudo
There's only one use of this for the convenience of a pattern. I think v_mov_b64_pseudo should also be moved, but SIFoldOperands does currently make use of it. llvm-svn: 279901
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp30
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp23
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td1
-rw-r--r--llvm/test/CodeGen/AMDGPU/ffloor.f64.ll18
-rw-r--r--llvm/test/CodeGen/AMDGPU/fract.f64.ll18
5 files changed, 49 insertions, 41 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 1e31c74f179..28d9322a471 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1453,6 +1453,36 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
return emitIndirectDst(MI, *BB, getSubtarget()->getInstrInfo());
case AMDGPU::SI_KILL:
return splitKillBlock(MI, BB);
+ case AMDGPU::V_CNDMASK_B64_PSEUDO: {
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
+
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Src0 = MI.getOperand(1).getReg();
+ unsigned Src1 = MI.getOperand(2).getReg();
+ const DebugLoc &DL = MI.getDebugLoc();
+ unsigned SrcCond = MI.getOperand(3).getReg();
+
+ unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
+ .addReg(Src0, 0, AMDGPU::sub0)
+ .addReg(Src1, 0, AMDGPU::sub0)
+ .addReg(SrcCond);
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
+ .addReg(Src0, 0, AMDGPU::sub1)
+ .addReg(Src1, 0, AMDGPU::sub1)
+ .addReg(SrcCond);
+
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::REG_SEQUENCE), Dst)
+ .addReg(DstLo)
+ .addImm(AMDGPU::sub0)
+ .addReg(DstHi)
+ .addImm(AMDGPU::sub1);
+ MI.eraseFromParent();
+ return BB;
+ }
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 433c0f27e25..2e7ca45c9b3 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -880,29 +880,6 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.eraseFromParent();
break;
}
-
- case AMDGPU::V_CNDMASK_B64_PSEUDO: {
- unsigned Dst = MI.getOperand(0).getReg();
- unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
- unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
- unsigned Src0 = MI.getOperand(1).getReg();
- unsigned Src1 = MI.getOperand(2).getReg();
- const MachineOperand &SrcCond = MI.getOperand(3);
-
- BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
- .addReg(RI.getSubReg(Src0, AMDGPU::sub0))
- .addReg(RI.getSubReg(Src1, AMDGPU::sub0))
- .addReg(SrcCond.getReg())
- .addReg(Dst, RegState::Implicit | RegState::Define);
- BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
- .addReg(RI.getSubReg(Src0, AMDGPU::sub1))
- .addReg(RI.getSubReg(Src1, AMDGPU::sub1))
- .addReg(SrcCond.getReg(), getKillRegState(SrcCond.isKill()))
- .addReg(Dst, RegState::Implicit | RegState::Define);
- MI.eraseFromParent();
- break;
- }
-
case AMDGPU::SI_PC_ADD_REL_OFFSET: {
MachineFunction &MF = *MBB.getParent();
unsigned Reg = MI.getOperand(0).getReg();
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index ad45df0d902..e02e2aea1bd 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1781,6 +1781,7 @@ def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$vdst),
(ins VSrc_64:$src0, VSrc_64:$src1, SSrc_64:$src2), "", []> {
let isPseudo = 1;
let isCodeGenOnly = 1;
+ let usesCustomInserter = 1;
}
// 64-bit vector move instruction. This is mainly used by the SIFoldOperands
diff --git a/llvm/test/CodeGen/AMDGPU/ffloor.f64.ll b/llvm/test/CodeGen/AMDGPU/ffloor.f64.ll
index ea708a2b7bb..904dfe31b15 100644
--- a/llvm/test/CodeGen/AMDGPU/ffloor.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/ffloor.f64.ll
@@ -14,9 +14,9 @@ declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone
; CI: v_floor_f64_e32
; SI: v_fract_f64_e32
; SI-DAG: v_min_f64
-; SI-DAG: v_cmp_class_f64_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
+; SI-DAG: v_cmp_class_f64_e64 vcc
+; SI: v_cndmask_b32_e32
+; SI: v_cndmask_b32_e32
; SI: v_add_f64
; SI: s_endpgm
define void @ffloor_f64(double addrspace(1)* %out, double %x) {
@@ -29,9 +29,9 @@ define void @ffloor_f64(double addrspace(1)* %out, double %x) {
; CI: v_floor_f64_e64
; SI: v_fract_f64_e64 {{v[[0-9]+:[0-9]+]}}, -[[INPUT:s[[0-9]+:[0-9]+]]]
; SI-DAG: v_min_f64
-; SI-DAG: v_cmp_class_f64_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
+; SI-DAG: v_cmp_class_f64_e64 vcc
+; SI: v_cndmask_b32_e32
+; SI: v_cndmask_b32_e32
; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -[[INPUT]]
; SI: s_endpgm
define void @ffloor_f64_neg(double addrspace(1)* %out, double %x) {
@@ -45,9 +45,9 @@ define void @ffloor_f64_neg(double addrspace(1)* %out, double %x) {
; CI: v_floor_f64_e64
; SI: v_fract_f64_e64 {{v[[0-9]+:[0-9]+]}}, -|[[INPUT:s[[0-9]+:[0-9]+]]]|
; SI-DAG: v_min_f64
-; SI-DAG: v_cmp_class_f64_e64
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
+; SI-DAG: v_cmp_class_f64_e64 vcc
+; SI: v_cndmask_b32_e32
+; SI: v_cndmask_b32_e32
; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -|[[INPUT]]|
; SI: s_endpgm
define void @ffloor_f64_neg_abs(double addrspace(1)* %out, double %x) {
diff --git a/llvm/test/CodeGen/AMDGPU/fract.f64.ll b/llvm/test/CodeGen/AMDGPU/fract.f64.ll
index 68b884363ec..29b90a43ab6 100644
--- a/llvm/test/CodeGen/AMDGPU/fract.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fract.f64.ll
@@ -13,9 +13,9 @@ declare double @llvm.floor.f64(double) #0
; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]]
-; SI-DAG: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3
-; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]]
-; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]]
+; SI-DAG: v_cmp_class_f64_e64 vcc, v{{\[}}[[LO]]:[[HI]]], 3
+; SI: v_cndmask_b32_e32 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], vcc
+; SI: v_cndmask_b32_e32 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], vcc
; SI: v_add_f64 [[SUB0:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]{{\]}}, -v{{\[}}[[RESLO]]:[[RESHI]]{{\]}}
; SI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]{{\]}}, -[[SUB0]]
@@ -40,9 +40,9 @@ define void @fract_f64(double addrspace(1)* %out, double addrspace(1)* %src) #1
; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]]
-; SI-DAG: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3
-; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]]
-; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]]
+; SI-DAG: v_cmp_class_f64_e64 vcc, v{{\[}}[[LO]]:[[HI]]], 3
+; SI: v_cndmask_b32_e32 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], vcc
+; SI: v_cndmask_b32_e32 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], vcc
; SI: v_add_f64 [[SUB0:v\[[0-9]+:[0-9]+\]]], -v{{\[}}[[LO]]:[[HI]]{{\]}}, -v{{\[}}[[RESLO]]:[[RESHI]]{{\]}}
; SI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -v{{\[}}[[LO]]:[[HI]]{{\]}}, -[[SUB0]]
@@ -68,9 +68,9 @@ define void @fract_f64_neg(double addrspace(1)* %out, double addrspace(1)* %src)
; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]]
-; SI-DAG: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3
-; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], [[COND]]
-; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], [[COND]]
+; SI-DAG: v_cmp_class_f64_e64 vcc, v{{\[}}[[LO]]:[[HI]]], 3
+; SI: v_cndmask_b32_e32 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], vcc
+; SI: v_cndmask_b32_e32 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], vcc
; SI: v_add_f64 [[SUB0:v\[[0-9]+:[0-9]+\]]], -|v{{\[}}[[LO]]:[[HI]]{{\]}}|, -v{{\[}}[[RESLO]]:[[RESHI]]{{\]}}
; SI: v_add_f64 [[FRACT:v\[[0-9]+:[0-9]+\]]], -|v{{\[}}[[LO]]:[[HI]]{{\]}}|, -[[SUB0]]
OpenPOWER on IntegriCloud