diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-12-03 05:22:35 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-12-03 05:22:35 +0000 |
commit | becd656c7caf68d443dc721ae01590d200beed9c (patch) | |
tree | e2da004150f309650b5175f7cae74a41a00bfba0 /llvm/lib | |
parent | 2f470c62cba90ca47f3aead9b0344c4c183dd6be (diff) | |
download | bcm5719-llvm-becd656c7caf68d443dc721ae01590d200beed9c.tar.gz bcm5719-llvm-becd656c7caf68d443dc721ae01590d200beed9c.zip |
R600/SI: Remove i1 pseudo VALU ops
Select i1 logical ops directly to 64-bit SALU instructions.
Vector i1 values are always really in SGPRs, with each
bit for each item in the wave. This saves about 4 instructions
when and/or/xoring any condition, and also helps write conditions
that need to be passed in vcc.
This should work correctly now that the SGPR live range
fixing pass works. More work is needed to eliminate the VReg_1
pseudo regclass and possibly the entire SILowerI1Copies pass.
llvm-svn: 223206
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/R600/SIInstrInfo.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/R600/SIInstructions.td | 53 | ||||
-rw-r--r-- | llvm/lib/Target/R600/SILowerI1Copies.cpp | 76 |
3 files changed, 70 insertions, 63 deletions
diff --git a/llvm/lib/Target/R600/SIInstrInfo.td b/llvm/lib/Target/R600/SIInstrInfo.td index cdbc22e0ead..4b3be5be578 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.td +++ b/llvm/lib/Target/R600/SIInstrInfo.td @@ -131,6 +131,10 @@ def as_i32imm: SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i32); }]>; +def as_i64imm: SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i64); +}]>; + def IMM8bit : PatLeaf <(imm), [{return isUInt<8>(N->getZExtValue());}] >; diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td index 00ce9bfcc26..cfe6c81ced9 100644 --- a/llvm/lib/Target/R600/SIInstructions.td +++ b/llvm/lib/Target/R600/SIInstructions.td @@ -1686,30 +1686,8 @@ defm V_TRIG_PREOP_F64 : VOP3Inst < //===----------------------------------------------------------------------===// // Pseudo Instructions //===----------------------------------------------------------------------===// - let isCodeGenOnly = 1, isPseudo = 1 in { -def V_MOV_I1 : InstSI < - (outs VReg_1:$dst), - (ins i1imm:$src), - "", [(set i1:$dst, (imm:$src))] ->; - -def V_AND_I1 : InstSI < - (outs VReg_1:$dst), (ins VReg_1:$src0, VReg_1:$src1), "", - [(set i1:$dst, (and i1:$src0, i1:$src1))] ->; - -def V_OR_I1 : InstSI < - (outs VReg_1:$dst), (ins VReg_1:$src0, VReg_1:$src1), "", - [(set i1:$dst, (or i1:$src0, i1:$src1))] ->; - -def V_XOR_I1 : InstSI < - (outs VReg_1:$dst), (ins VReg_1:$src0, VReg_1:$src1), "", - [(set i1:$dst, (xor i1:$src0, i1:$src1))] ->; - let hasSideEffects = 1 in { def SGPR_USE : InstSI <(outs),(ins), "", []>; } @@ -2495,6 +2473,14 @@ def : Pat < (S_MOV_B64 InlineImm<i64>:$imm) >; +// XXX - Should this use a s_cmp to set SCC? + +// Set to sign-extended 64-bit value (true = -1, false = 0) +def : Pat < + (i1 imm:$imm), + (S_MOV_B64 (i64 (as_i64imm $imm))) +>; + /********** ===================== **********/ /********** Interpolation Paterns **********/ /********** ===================== **********/ @@ -3045,6 +3031,27 @@ def : Pat < (V_CNDMASK_B32_e64 0, -1, $src), sub1) >; +// If we need to perform a logical operation on i1 values, we need to +// use vector comparisons since there is only one SCC register. Vector +// comparisions still write to a pair of SGPRs, so treat these as +// 64-bit comparisons. When legalizing SGPR copies, instructions +// resulting in the copies from SCC to these instructions will be +// moved to the VALU. +def : Pat < + (i1 (and i1:$src0, i1:$src1)), + (S_AND_B64 $src0, $src1) +>; + +def : Pat < + (i1 (or i1:$src0, i1:$src1)), + (S_OR_B64 $src0, $src1) +>; + +def : Pat < + (i1 (xor i1:$src0, i1:$src1)), + (S_XOR_B64 $src0, $src1) +>; + def : Pat < (f32 (sint_to_fp i1:$src)), (V_CNDMASK_B32_e64 (i32 0), CONST.FP32_NEG_ONE, $src) @@ -3057,7 +3064,7 @@ def : Pat < def : Pat < (f64 (sint_to_fp i1:$src)), - (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src)) + (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src)) >; def : Pat < diff --git a/llvm/lib/Target/R600/SILowerI1Copies.cpp b/llvm/lib/Target/R600/SILowerI1Copies.cpp index 226a672b343..7767c4c0671 100644 --- a/llvm/lib/Target/R600/SILowerI1Copies.cpp +++ b/llvm/lib/Target/R600/SILowerI1Copies.cpp @@ -85,30 +85,6 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) { Next = std::next(I); MachineInstr &MI = *I; - if (MI.getOpcode() == AMDGPU::V_MOV_I1) { - I1Defs.push_back(MI.getOperand(0).getReg()); - MI.setDesc(TII->get(AMDGPU::V_MOV_B32_e32)); - continue; - } - - if (MI.getOpcode() == AMDGPU::V_AND_I1) { - I1Defs.push_back(MI.getOperand(0).getReg()); - MI.setDesc(TII->get(AMDGPU::V_AND_B32_e64)); - continue; - } - - if (MI.getOpcode() == AMDGPU::V_OR_I1) { - I1Defs.push_back(MI.getOperand(0).getReg()); - MI.setDesc(TII->get(AMDGPU::V_OR_B32_e64)); - continue; - } - - if (MI.getOpcode() == AMDGPU::V_XOR_I1) { - I1Defs.push_back(MI.getOperand(0).getReg()); - MI.setDesc(TII->get(AMDGPU::V_XOR_B32_e64)); - continue; - } - if (MI.getOpcode() == AMDGPU::IMPLICIT_DEF) { unsigned Reg = MI.getOperand(0).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(Reg); @@ -117,32 +93,52 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) { continue; } - if (MI.getOpcode() != AMDGPU::COPY || - !TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()) || - !TargetRegisterInfo::isVirtualRegister(MI.getOperand(1).getReg())) + if (MI.getOpcode() != AMDGPU::COPY) continue; + const MachineOperand &Dst = MI.getOperand(0); + const MachineOperand &Src = MI.getOperand(1); + + if (!TargetRegisterInfo::isVirtualRegister(Src.getReg()) || + !TargetRegisterInfo::isVirtualRegister(Dst.getReg())) + continue; - const TargetRegisterClass *DstRC = - MRI.getRegClass(MI.getOperand(0).getReg()); - const TargetRegisterClass *SrcRC = - MRI.getRegClass(MI.getOperand(1).getReg()); + const TargetRegisterClass *DstRC = MRI.getRegClass(Dst.getReg()); + const TargetRegisterClass *SrcRC = MRI.getRegClass(Src.getReg()); if (DstRC == &AMDGPU::VReg_1RegClass && TRI->getCommonSubClass(SrcRC, &AMDGPU::SGPR_64RegClass)) { - I1Defs.push_back(MI.getOperand(0).getReg()); - BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CNDMASK_B32_e64)) - .addOperand(MI.getOperand(0)) - .addImm(0) - .addImm(-1) - .addOperand(MI.getOperand(1)); + I1Defs.push_back(Dst.getReg()); + DebugLoc DL = MI.getDebugLoc(); + + MachineInstr *DefInst = MRI.getUniqueVRegDef(Src.getReg()); + if (DefInst->getOpcode() == AMDGPU::S_MOV_B64) { + if (DefInst->getOperand(1).isImm()) { + I1Defs.push_back(Dst.getReg()); + + int64_t Val = DefInst->getOperand(1).getImm(); + assert(Val == 0 || Val == -1); + + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_MOV_B32_e32)) + .addOperand(Dst) + .addImm(Val); + MI.eraseFromParent(); + continue; + } + } + + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64)) + .addOperand(Dst) + .addImm(0) + .addImm(-1) + .addOperand(Src); MI.eraseFromParent(); } else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) && SrcRC == &AMDGPU::VReg_1RegClass) { BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_I32_e64)) - .addOperand(MI.getOperand(0)) - .addOperand(MI.getOperand(1)) - .addImm(0); + .addOperand(Dst) + .addOperand(Src) + .addImm(0); MI.eraseFromParent(); } } |