R600/SI: implement range reduction for sin/cos

These instructions can only take a limited input range, and return the constant value 1 out of range. We should do range reduction to be able to process arbitrary values. Use a FRACT instruction after normalization to achieve this. Also add a test for constant folding with the lowered code with unsafe-fp-math enabled. v2: use DAG lowering instead of intrinsic, adapt test v3: calculate constant, fold pattern into instruction definition v4: misc style fixes, add sin-fold testcase, cosmetics Patch by Grigori Goronzy llvm-svn: 213458
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2014-07-19 18:44:39 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> 2014-07-19 18:44:39 +0000
commit: ad14ce84b7091aa81eb75d0df0f0ce6788688fb3 (patch)
tree: a0b6f426de4d48992ea5dfefd7bdc0ce4da34392 /llvm/lib
parent: 8ca36815eec998bbc21d2749fe6029812d60725a (diff)
download: bcm5719-llvm-ad14ce84b7091aa81eb75d0df0f0ce6788688fb3.tar.gz
bcm5719-llvm-ad14ce84b7091aa81eb75d0df0f0ce6788688fb3.zip
4 files changed, 33 insertions, 12 deletions
diff --git a/llvm/lib/Target/R600/AMDGPUInstrInfo.td b/llvm/lib/Target/R600/AMDGPUInstrInfo.td
index 934d59d1269..820f1a80d75 100644
--- a/llvm/lib/Target/R600/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/R600/AMDGPUInstrInfo.td
@@ -34,6 +34,9 @@ def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
 // This argument to this node is a dword address.
 def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
 
+def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
+def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;
+
 // out = a - floor(a)
 def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
 
diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp
index b3429b9748c..86997c82ebe 100644
--- a/llvm/lib/Target/R600/SIISelLowering.cpp
+++ b/llvm/lib/Target/R600/SIISelLowering.cpp
@@ -80,6 +80,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::SUBC, MVT::i32, Legal);
   setOperationAction(ISD::SUBE, MVT::i32, Legal);
 
+  setOperationAction(ISD::FSIN, MVT::f32, Custom);
+  setOperationAction(ISD::FCOS, MVT::f32, Custom);
+
   // We need to custom lower vector stores from local memory
   setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
   setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
@@ -637,6 +640,9 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     }
   }
 
+  case ISD::FSIN:
+  case ISD::FCOS:
+    return LowerTrig(Op, DAG);
   case ISD::SELECT: return LowerSELECT(Op, DAG);
   case ISD::FDIV: return LowerFDIV(Op, DAG);
   case ISD::STORE: return LowerSTORE(Op, DAG);
@@ -1116,6 +1122,23 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   return Chain;
 }
 
+SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  SDValue Arg = Op.getOperand(0);
+  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, SDLoc(Op), VT,
+        DAG.getNode(ISD::FMUL, SDLoc(Op), VT, Arg,
+          DAG.getConstantFP(0.5 / M_PI, VT)));
+
+  switch (Op.getOpcode()) {
+  case ISD::FCOS:
+    return DAG.getNode(AMDGPUISD::COS_HW, SDLoc(Op), VT, FractPart);
+  case ISD::FSIN:
+    return DAG.getNode(AMDGPUISD::SIN_HW, SDLoc(Op), VT, FractPart);
+  default:
+    llvm_unreachable("Wrong trig opcode");
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // Custom DAG optimizations
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/R600/SIISelLowering.h b/llvm/lib/Target/R600/SIISelLowering.h
index 9e9a0b05f53..b3343ee6694 100644
--- a/llvm/lib/Target/R600/SIISelLowering.h
+++ b/llvm/lib/Target/R600/SIISelLowering.h
@@ -32,6 +32,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
   SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
 
   bool foldImm(SDValue &Operand, int32_t &Immediate,
diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td
index a4920db3378..bd5be3248e0 100644
--- a/llvm/lib/Target/R600/SIInstructions.td
+++ b/llvm/lib/Target/R600/SIInstructions.td
@@ -1167,8 +1167,12 @@ defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32",
 defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64",
   [(set f64:$dst, (fsqrt f64:$src0))]
 >;
-defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", []>;
-defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", []>;
+defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32",
+  [(set f32:$dst, (AMDGPUsin f32:$src0))]
+>;
+defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32",
+  [(set f32:$dst, (AMDGPUcos f32:$src0))]
+>;
 defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>;
 defm V_BFREV_B32 : VOP1_32 <0x00000038, "V_BFREV_B32", []>;
 defm V_FFBH_U32 : VOP1_32 <0x00000039, "V_FFBH_U32", []>;
@@ -2343,16 +2347,6 @@ def : Pat<
 >;
 
 def : Pat <
-  (fcos f32:$src0),
-  (V_COS_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
->;
-
-def : Pat <
-  (fsin f32:$src0),
-  (V_SIN_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
->;
-
-def : Pat <
   (int_AMDGPU_cube v4f32:$src),
   (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
     (V_CUBETC_F32 (EXTRACT_SUBREG $src, sub0),
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2014-07-19 18:44:39 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	2014-07-19 18:44:39 +0000
commit	ad14ce84b7091aa81eb75d0df0f0ce6788688fb3 (patch)
tree	a0b6f426de4d48992ea5dfefd7bdc0ce4da34392 /llvm/lib
parent	8ca36815eec998bbc21d2749fe6029812d60725a (diff)
download	bcm5719-llvm-ad14ce84b7091aa81eb75d0df0f0ce6788688fb3.tar.gz bcm5719-llvm-ad14ce84b7091aa81eb75d0df0f0ce6788688fb3.zip