summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2014-08-15 18:42:22 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2014-08-15 18:42:22 +0000
commitfabf545299698eded064b29fe8446ba81051f284 (patch)
treed8c3e591ee4a925ead8d2ed466e60b52ef8f40e9 /llvm/lib
parent13623d0e2877d9273f26a0ba9d059224d6f4e3f1 (diff)
downloadbcm5719-llvm-fabf545299698eded064b29fe8446ba81051f284.tar.gz
bcm5719-llvm-fabf545299698eded064b29fe8446ba81051f284.zip
R600/SI: Move all fabs / fneg handling to patterns
llvm-svn: 215749
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/R600/SIISelLowering.cpp89
-rw-r--r--llvm/lib/Target/R600/SIInstructions.td59
2 files changed, 31 insertions, 117 deletions
diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp
index 1d5b43f5954..09e319a3a5d 100644
--- a/llvm/lib/Target/R600/SIISelLowering.cpp
+++ b/llvm/lib/Target/R600/SIISelLowering.cpp
@@ -644,95 +644,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MI->eraseFromParent();
break;
}
- case AMDGPU::FABS_SI: {
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
- getTargetMachine().getSubtargetImpl()->getInstrInfo());
- DebugLoc DL = MI->getDebugLoc();
- unsigned DestReg = MI->getOperand(0).getReg();
- unsigned Reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
-
- BuildMI(*BB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), Reg)
- .addImm(0x7fffffff);
- BuildMI(*BB, I, DL, TII->get(AMDGPU::V_AND_B32_e32), DestReg)
- .addReg(MI->getOperand(1).getReg())
- .addReg(Reg);
- MI->eraseFromParent();
- break;
- }
- case AMDGPU::FABS64_SI: {
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
- getTargetMachine().getSubtargetImpl()->getInstrInfo());
-
- DebugLoc DL = MI->getDebugLoc();
- unsigned SuperReg = MI->getOperand(0).getReg();
- unsigned SrcReg = MI->getOperand(1).getReg();
-
- unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
-
- // Copy the subregister to make sure it is the right register class.
- unsigned VReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
- BuildMI(*BB, I, DL, TII->get(AMDGPU::COPY), VReg)
- .addReg(SrcReg, 0, AMDGPU::sub1);
-
- // We only need to mask the upper half of the register pair.
- BuildMI(*BB, I, DL, TII->get(AMDGPU::V_AND_B32_e32), TmpReg)
- .addImm(0x7fffffff)
- .addReg(VReg);
-
- BuildMI(*BB, I, DL, TII->get(AMDGPU::REG_SEQUENCE), SuperReg)
- .addReg(SrcReg, 0, AMDGPU::sub0)
- .addImm(AMDGPU::sub0)
- .addReg(TmpReg)
- .addImm(AMDGPU::sub1);
- MI->eraseFromParent();
- break;
- }
- case AMDGPU::FNEG_SI: {
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
- getTargetMachine().getSubtargetImpl()->getInstrInfo());
- DebugLoc DL = MI->getDebugLoc();
- unsigned DestReg = MI->getOperand(0).getReg();
- unsigned Reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
-
- // FIXME: Should use SALU instructions
- BuildMI(*BB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), Reg)
- .addImm(0x80000000);
- BuildMI(*BB, I, DL, TII->get(AMDGPU::V_XOR_B32_e32), DestReg)
- .addReg(MI->getOperand(1).getReg())
- .addReg(Reg);
- MI->eraseFromParent();
- break;
- }
- case AMDGPU::FNEG64_SI: {
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
- getTargetMachine().getSubtargetImpl()->getInstrInfo());
-
- DebugLoc DL = MI->getDebugLoc();
- unsigned SrcReg = MI->getOperand(1).getReg();
- unsigned DestReg = MI->getOperand(0).getReg();
-
- unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
- unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
-
- // FIXME: Should use SALU instructions
- BuildMI(*BB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), ImmReg)
- .addImm(0x80000000);
- BuildMI(*BB, I, DL, TII->get(AMDGPU::V_XOR_B32_e32), TmpReg)
- .addReg(SrcReg, 0, AMDGPU::sub1)
- .addReg(ImmReg);
-
- BuildMI(*BB, I, DL, TII->get(AMDGPU::REG_SEQUENCE), DestReg)
- .addReg(SrcReg, 0, AMDGPU::sub0)
- .addImm(AMDGPU::sub0)
- .addReg(TmpReg)
- .addImm(AMDGPU::sub1);
- MI->eraseFromParent();
- break;
- }
case AMDGPU::FCLAMP_SI: {
const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
getTargetMachine().getSubtargetImpl()->getInstrInfo());
diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td
index 8d2c212dc15..fd011a16067 100644
--- a/llvm/lib/Target/R600/SIInstructions.td
+++ b/llvm/lib/Target/R600/SIInstructions.td
@@ -2322,48 +2322,51 @@ def : Pat <
/********** Floating point absolute/negative **********/
/********** ================================ **********/
-// Manipulate the sign bit directly, as e.g. using the source negation modifier
-// in V_ADD_F32_e64 $src, 0, [...] does not result in -0.0 for $src == +0.0,
-// breaking the piglit *s-floatBitsToInt-neg* tests
+// Prevent expanding both fneg and fabs.
-// TODO: Look into not implementing isFNegFree/isFAbsFree for SI, and possibly
-// removing these patterns
+// FIXME: Should use S_OR_B32
def : Pat <
(fneg (fabs f32:$src)),
(V_OR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Set sign bit */
>;
+// FIXME: Should use S_OR_B32
def : Pat <
(fneg (fabs f64:$src)),
(f64 (INSERT_SUBREG
(INSERT_SUBREG (f64 (IMPLICIT_DEF)),
(i32 (EXTRACT_SUBREG f64:$src, sub0)), sub0),
- (V_OR_B32_e32 (S_MOV_B32 0x80000000),
- (EXTRACT_SUBREG f64:$src, sub1)), sub1)) // Set sign bit.
->;
-
-class SIUnaryCustomInsertInst<string name, SDPatternOperator node,
- ValueType vt,
- RegisterClass dstrc,
- RegisterClass srcrc> :
- AMDGPUShaderInst<
- (outs dstrc:$dst),
- (ins srcrc:$src0),
- name#" $dst, $src0",
- [(set vt:$dst, (node vt:$src0))]> {
- let usesCustomInserter = 1;
-}
+ (V_OR_B32_e32 (EXTRACT_SUBREG f64:$src, sub1),
+ (V_MOV_B32_e32 0x80000000)), sub1)) // Set sign bit.
+>;
-def FABS_SI : SIUnaryCustomInsertInst<"FABS_SI", fabs,
- f32, VReg_32, VSrc_32>;
-def FNEG_SI : SIUnaryCustomInsertInst<"FNEG_SI", fneg,
- f32, VReg_32, VSrc_32>;
+def : Pat <
+ (fabs f32:$src),
+ (V_AND_B32_e32 $src, (V_MOV_B32_e32 0x7fffffff))
+>;
-def FABS64_SI : SIUnaryCustomInsertInst<"FABS64_SI", fabs,
- f64, VReg_64, VSrc_64>;
-def FNEG64_SI : SIUnaryCustomInsertInst<"FNEG64_SI", fneg,
- f64, VReg_64, VSrc_64>;
+def : Pat <
+ (fneg f32:$src),
+ (V_XOR_B32_e32 $src, (V_MOV_B32_e32 0x80000000))
+>;
+
+def : Pat <
+ (fabs f64:$src),
+ (f64 (INSERT_SUBREG
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+ (i32 (EXTRACT_SUBREG f64:$src, sub0)), sub0),
+ (V_AND_B32_e32 (EXTRACT_SUBREG f64:$src, sub1),
+ (V_MOV_B32_e32 0x7fffffff)), sub1)) // Set sign bit.
+>;
+def : Pat <
+ (fneg f64:$src),
+ (f64 (INSERT_SUBREG
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+ (i32 (EXTRACT_SUBREG f64:$src, sub0)), sub0),
+ (V_XOR_B32_e32 (EXTRACT_SUBREG f64:$src, sub1),
+ (V_MOV_B32_e32 0x80000000)), sub1))
+>;
/********** ================== **********/
/********** Immediate Patterns **********/
OpenPOWER on IntegriCloud