diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-08-15 18:42:22 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-08-15 18:42:22 +0000 |
| commit | fabf545299698eded064b29fe8446ba81051f284 (patch) | |
| tree | d8c3e591ee4a925ead8d2ed466e60b52ef8f40e9 /llvm/lib | |
| parent | 13623d0e2877d9273f26a0ba9d059224d6f4e3f1 (diff) | |
| download | bcm5719-llvm-fabf545299698eded064b29fe8446ba81051f284.tar.gz bcm5719-llvm-fabf545299698eded064b29fe8446ba81051f284.zip | |
R600/SI: Move all fabs / fneg handling to patterns
llvm-svn: 215749
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/R600/SIISelLowering.cpp | 89 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/SIInstructions.td | 59 |
2 files changed, 31 insertions, 117 deletions
diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp index 1d5b43f5954..09e319a3a5d 100644 --- a/llvm/lib/Target/R600/SIISelLowering.cpp +++ b/llvm/lib/Target/R600/SIISelLowering.cpp @@ -644,95 +644,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( MI->eraseFromParent(); break; } - case AMDGPU::FABS_SI: { - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - const SIInstrInfo *TII = static_cast<const SIInstrInfo *>( - getTargetMachine().getSubtargetImpl()->getInstrInfo()); - DebugLoc DL = MI->getDebugLoc(); - unsigned DestReg = MI->getOperand(0).getReg(); - unsigned Reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); - - BuildMI(*BB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), Reg) - .addImm(0x7fffffff); - BuildMI(*BB, I, DL, TII->get(AMDGPU::V_AND_B32_e32), DestReg) - .addReg(MI->getOperand(1).getReg()) - .addReg(Reg); - MI->eraseFromParent(); - break; - } - case AMDGPU::FABS64_SI: { - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - const SIInstrInfo *TII = static_cast<const SIInstrInfo *>( - getTargetMachine().getSubtargetImpl()->getInstrInfo()); - - DebugLoc DL = MI->getDebugLoc(); - unsigned SuperReg = MI->getOperand(0).getReg(); - unsigned SrcReg = MI->getOperand(1).getReg(); - - unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); - - // Copy the subregister to make sure it is the right register class. - unsigned VReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); - BuildMI(*BB, I, DL, TII->get(AMDGPU::COPY), VReg) - .addReg(SrcReg, 0, AMDGPU::sub1); - - // We only need to mask the upper half of the register pair. - BuildMI(*BB, I, DL, TII->get(AMDGPU::V_AND_B32_e32), TmpReg) - .addImm(0x7fffffff) - .addReg(VReg); - - BuildMI(*BB, I, DL, TII->get(AMDGPU::REG_SEQUENCE), SuperReg) - .addReg(SrcReg, 0, AMDGPU::sub0) - .addImm(AMDGPU::sub0) - .addReg(TmpReg) - .addImm(AMDGPU::sub1); - MI->eraseFromParent(); - break; - } - case AMDGPU::FNEG_SI: { - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - const SIInstrInfo *TII = static_cast<const SIInstrInfo *>( - getTargetMachine().getSubtargetImpl()->getInstrInfo()); - DebugLoc DL = MI->getDebugLoc(); - unsigned DestReg = MI->getOperand(0).getReg(); - unsigned Reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); - - // FIXME: Should use SALU instructions - BuildMI(*BB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), Reg) - .addImm(0x80000000); - BuildMI(*BB, I, DL, TII->get(AMDGPU::V_XOR_B32_e32), DestReg) - .addReg(MI->getOperand(1).getReg()) - .addReg(Reg); - MI->eraseFromParent(); - break; - } - case AMDGPU::FNEG64_SI: { - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - const SIInstrInfo *TII = static_cast<const SIInstrInfo *>( - getTargetMachine().getSubtargetImpl()->getInstrInfo()); - - DebugLoc DL = MI->getDebugLoc(); - unsigned SrcReg = MI->getOperand(1).getReg(); - unsigned DestReg = MI->getOperand(0).getReg(); - - unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); - unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); - - // FIXME: Should use SALU instructions - BuildMI(*BB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), ImmReg) - .addImm(0x80000000); - BuildMI(*BB, I, DL, TII->get(AMDGPU::V_XOR_B32_e32), TmpReg) - .addReg(SrcReg, 0, AMDGPU::sub1) - .addReg(ImmReg); - - BuildMI(*BB, I, DL, TII->get(AMDGPU::REG_SEQUENCE), DestReg) - .addReg(SrcReg, 0, AMDGPU::sub0) - .addImm(AMDGPU::sub0) - .addReg(TmpReg) - .addImm(AMDGPU::sub1); - MI->eraseFromParent(); - break; - } case AMDGPU::FCLAMP_SI: { const SIInstrInfo *TII = static_cast<const SIInstrInfo *>( getTargetMachine().getSubtargetImpl()->getInstrInfo()); diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td index 8d2c212dc15..fd011a16067 100644 --- a/llvm/lib/Target/R600/SIInstructions.td +++ b/llvm/lib/Target/R600/SIInstructions.td @@ -2322,48 +2322,51 @@ def : Pat < /********** Floating point absolute/negative **********/ /********** ================================ **********/ -// Manipulate the sign bit directly, as e.g. using the source negation modifier -// in V_ADD_F32_e64 $src, 0, [...] does not result in -0.0 for $src == +0.0, -// breaking the piglit *s-floatBitsToInt-neg* tests +// Prevent expanding both fneg and fabs. -// TODO: Look into not implementing isFNegFree/isFAbsFree for SI, and possibly -// removing these patterns +// FIXME: Should use S_OR_B32 def : Pat < (fneg (fabs f32:$src)), (V_OR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Set sign bit */ >; +// FIXME: Should use S_OR_B32 def : Pat < (fneg (fabs f64:$src)), (f64 (INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (i32 (EXTRACT_SUBREG f64:$src, sub0)), sub0), - (V_OR_B32_e32 (S_MOV_B32 0x80000000), - (EXTRACT_SUBREG f64:$src, sub1)), sub1)) // Set sign bit. ->; - -class SIUnaryCustomInsertInst<string name, SDPatternOperator node, - ValueType vt, - RegisterClass dstrc, - RegisterClass srcrc> : - AMDGPUShaderInst< - (outs dstrc:$dst), - (ins srcrc:$src0), - name#" $dst, $src0", - [(set vt:$dst, (node vt:$src0))]> { - let usesCustomInserter = 1; -} + (V_OR_B32_e32 (EXTRACT_SUBREG f64:$src, sub1), + (V_MOV_B32_e32 0x80000000)), sub1)) // Set sign bit. +>; -def FABS_SI : SIUnaryCustomInsertInst<"FABS_SI", fabs, - f32, VReg_32, VSrc_32>; -def FNEG_SI : SIUnaryCustomInsertInst<"FNEG_SI", fneg, - f32, VReg_32, VSrc_32>; +def : Pat < + (fabs f32:$src), + (V_AND_B32_e32 $src, (V_MOV_B32_e32 0x7fffffff)) +>; -def FABS64_SI : SIUnaryCustomInsertInst<"FABS64_SI", fabs, - f64, VReg_64, VSrc_64>; -def FNEG64_SI : SIUnaryCustomInsertInst<"FNEG64_SI", fneg, - f64, VReg_64, VSrc_64>; +def : Pat < + (fneg f32:$src), + (V_XOR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) +>; + +def : Pat < + (fabs f64:$src), + (f64 (INSERT_SUBREG + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (i32 (EXTRACT_SUBREG f64:$src, sub0)), sub0), + (V_AND_B32_e32 (EXTRACT_SUBREG f64:$src, sub1), + (V_MOV_B32_e32 0x7fffffff)), sub1)) // Set sign bit. +>; +def : Pat < + (fneg f64:$src), + (f64 (INSERT_SUBREG + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (i32 (EXTRACT_SUBREG f64:$src, sub0)), sub0), + (V_XOR_B32_e32 (EXTRACT_SUBREG f64:$src, sub1), + (V_MOV_B32_e32 0x80000000)), sub1)) +>; /********** ================== **********/ /********** Immediate Patterns **********/ |

