[Power9] Implement new vsx instructions: quad-precision move, fp-arithmetic

This change implements the following vsx instructions: - quad-precision move xscpsgnqp, xsabsqp, xsnegqp, xsnabsqp - quad-precision fp-arithmetic xsaddqp(o) xsdivqp(o) xsmulqp(o) xssqrtqp(o) xssubqp(o) xsmaddqp(o) xsmsubqp(o) xsnmaddqp(o) xsnmsubqp(o) 22 instructions Thanks Nemanja and Kit for careful review and invaluable discussion! Reviewers: hal, nemanja, kbarton, tjablin, amehsan http://reviews.llvm.org/D16110 llvm-svn: 264565
author: Chuang-Yu Cheng <cycheng@multicorewareinc.com> 2016-03-28 07:38:01 +0000
committer: Chuang-Yu Cheng <cycheng@multicorewareinc.com> 2016-03-28 07:38:01 +0000
commit: 5663848996b01faeb58276be25d5e364378fc1c9 (patch)
tree: 2bc81f0ed8d738d001e8a1748b222f0ece64a835 /llvm/lib/Target
parent: ed638864d3e19559ca0d88523356a1f5bd141b99 (diff)
download: bcm5719-llvm-5663848996b01faeb58276be25d5e364378fc1c9.tar.gz
bcm5719-llvm-5663848996b01faeb58276be25d5e364378fc1c9.zip
2 files changed, 171 insertions, 0 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index e17bfd7cdaa..8b035e1ace1 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -1819,6 +1819,55 @@ let Predicates = [HasP9Vector] in {
     : XX3Form<opcode, xo, (outs xty:$XT), (ins aty:$XA, bty:$XB),
               !strconcat(opc, " $XT, $XA, $XB"), itin, pattern>;
 
+  // [PO VRT VRA VRB XO /]
+  class X_VT5_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
+                      list<dag> pattern>
+    : XForm_1<opcode, xo, (outs vrrc:$vT), (ins vrrc:$vA, vrrc:$vB),
+              !strconcat(opc, " $vT, $vA, $vB"), IIC_VecFP, pattern>;
+
+  // [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /]
+  class X_VT5_VA5_VB5_Ro<bits<6> opcode, bits<10> xo, string opc,
+                         list<dag> pattern>
+    : X_VT5_VA5_VB5<opcode, xo, opc, pattern>, isDOT;
+
+  //===--------------------------------------------------------------------===//
+  // Quad-Precision Scalar Move Instructions:
+
+  // Copy Sign
+  def XSCPSGNQP : X_VT5_VA5_VB5<63, 100, "xscpsgnqp", []>;
+
+  // Absolute/Negative-Absolute/Negate
+  def XSABSQP   : X_VT5_XO5_VB5<63,  0, 804, "xsabsqp" , []>;
+  def XSNABSQP  : X_VT5_XO5_VB5<63,  8, 804, "xsnabsqp", []>;
+  def XSNEGQP   : X_VT5_XO5_VB5<63, 16, 804, "xsnegqp" , []>;
+
+  //===--------------------------------------------------------------------===//
+  // Quad-Precision Scalar Floating-Point Arithmetic Instructions:
+
+  // Add/Divide/Multiply/Subtract
+  def XSADDQP   : X_VT5_VA5_VB5   <63,   4, "xsaddqp" , []>;
+  def XSADDQPO  : X_VT5_VA5_VB5_Ro<63,   4, "xsaddqpo", []>;
+  def XSDIVQP   : X_VT5_VA5_VB5   <63, 548, "xsdivqp" , []>;
+  def XSDIVQPO  : X_VT5_VA5_VB5_Ro<63, 548, "xsdivqpo", []>;
+  def XSMULQP   : X_VT5_VA5_VB5   <63,  36, "xsmulqp" , []>;
+  def XSMULQPO  : X_VT5_VA5_VB5_Ro<63,  36, "xsmulqpo", []>;
+  def XSSUBQP   : X_VT5_VA5_VB5   <63, 516, "xssubqp" , []>;
+  def XSSUBQPO  : X_VT5_VA5_VB5_Ro<63, 516, "xssubqpo", []>;
+
+  // Square-Root
+  def XSSQRTQP  : X_VT5_XO5_VB5   <63, 27, 804, "xssqrtqp" , []>;
+  def XSSQRTQPO : X_VT5_XO5_VB5_Ro<63, 27, 804, "xssqrtqpo", []>;
+
+  // (Negative) Multiply-{Add/Subtract}
+  def XSMADDQP  : X_VT5_VA5_VB5   <63, 388, "xsmaddqp"  , []>;
+  def XSMADDQPO : X_VT5_VA5_VB5_Ro<63, 388, "xsmaddqpo" , []>;
+  def XSMSUBQP  : X_VT5_VA5_VB5   <63, 420, "xsmsubqp"  , []>;
+  def XSMSUBQPO : X_VT5_VA5_VB5_Ro<63, 420, "xsmsubqpo" , []>;
+  def XSNMADDQP : X_VT5_VA5_VB5   <63, 452, "xsnmaddqp" , []>;
+  def XSNMADDQPO: X_VT5_VA5_VB5_Ro<63, 452, "xsnmaddqpo", []>;
+  def XSNMSUBQP : X_VT5_VA5_VB5   <63, 484, "xsnmsubqp" , []>;
+  def XSNMSUBQPO: X_VT5_VA5_VB5_Ro<63, 484, "xsnmsubqpo", []>;
+
   //===--------------------------------------------------------------------===//
   // Quad/Double-Precision Compare Instructions:
 
diff --git a/llvm/lib/Target/PowerPC/README_P9.txt b/llvm/lib/Target/PowerPC/README_P9.txt
index 5482d4a3fc3..a58f52249b0 100644
--- a/llvm/lib/Target/PowerPC/README_P9.txt
+++ b/llvm/lib/Target/PowerPC/README_P9.txt
@@ -141,6 +141,128 @@ Altivec:
     VX1_Int_Ty< 65, "vmul10ecuq", int_ppc_altivec_vmul10ecuq, v1i128>;
 
 VSX:
+- QP Copy Sign: xscpsgnqp
+  . Similar to xscpsgndp
+  . (set f128:$vT, (fcopysign f128:$vB, f128:$vA)
+
+- QP Absolute/Negative-Absolute/Negate: xsabsqp xsnabsqp xsnegqp
+  . Similar to xsabsdp/xsnabsdp/xsnegdp
+  . (set f128:$vT, (fabs f128:$vB))             // xsabsqp
+    (set f128:$vT, (fneg (fabs f128:$vB)))      // xsnabsqp
+    (set f128:$vT, (fneg f128:$vB))             // xsnegqp
+
+- QP Add/Divide/Multiply/Subtract/Square-Root:
+  xsaddqp xsdivqp xsmulqp xssubqp xssqrtqp
+  . Similar to xsadddp
+  . isCommutable = 1
+    (set f128:$vT, (fadd f128:$vA, f128:$vB))   // xsaddqp
+    (set f128:$vT, (fmul f128:$vA, f128:$vB))   // xsmulqp
+
+  . isCommutable = 0
+    (set f128:$vT, (fdiv f128:$vA, f128:$vB))   // xsdivqp
+    (set f128:$vT, (fsub f128:$vA, f128:$vB))   // xssubqp
+    (set f128:$vT, (fsqrt f128:$vB)))           // xssqrtqp
+
+- Round to Odd of QP Add/Divide/Multiply/Subtract/Square-Root:
+  xsaddqpo xsdivqpo xsmulqpo xssubqpo xssqrtqpo
+  . Similar to xsrsqrtedp??
+      def XSRSQRTEDP : XX2Form<60, 74,
+                               (outs vsfrc:$XT), (ins vsfrc:$XB),
+                               "xsrsqrtedp $XT, $XB", IIC_VecFP,
+                               [(set f64:$XT, (PPCfrsqrte f64:$XB))]>;
+
+  . Define DAG Node in PPCInstrInfo.td:
+    def PPCfaddrto: SDNode<"PPCISD::FADDRTO", SDTFPBinOp, []>;
+    def PPCfdivrto: SDNode<"PPCISD::FDIVRTO", SDTFPBinOp, []>;
+    def PPCfmulrto: SDNode<"PPCISD::FMULRTO", SDTFPBinOp, []>;
+    def PPCfsubrto: SDNode<"PPCISD::FSUBRTO", SDTFPBinOp, []>;
+    def PPCfsqrtrto: SDNode<"PPCISD::FSQRTRTO", SDTFPUnaryOp, []>;
+
+    DAG patterns of each instruction (PPCInstrVSX.td):
+    . isCommutable = 1
+      (set f128:$vT, (PPCfaddrto f128:$vA, f128:$vB))   // xsaddqpo
+      (set f128:$vT, (PPCfmulrto f128:$vA, f128:$vB))   // xsmulqpo
+
+    . isCommutable = 0
+      (set f128:$vT, (PPCfdivrto f128:$vA, f128:$vB))   // xsdivqpo
+      (set f128:$vT, (PPCfsubrto f128:$vA, f128:$vB))   // xssubqpo
+      (set f128:$vT, (PPCfsqrtrto f128:$vB))            // xssqrtqpo
+
+- QP (Negative) Multiply-{Add/Subtract}: xsmaddqp xsmsubqp xsnmaddqp xsnmsubqp
+  . Ref: xsmaddadp/xsmsubadp/xsnmaddadp/xsnmsubadp
+
+  . isCommutable = 1
+    // xsmaddqp
+    [(set f128:$vT, (fma f128:$vA, f128:$vB, f128:$vTi))]>,
+    RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
+    AltVSXFMARel;
+
+    // xsmsubqp
+    [(set f128:$vT, (fma f128:$vA, f128:$vB, (fneg f128:$vTi)))]>,
+    RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
+    AltVSXFMARel;
+
+    // xsnmaddqp
+    [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, f128:$vTi)))]>,
+    RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
+    AltVSXFMARel;
+
+    // xsnmsubqp
+    [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, (fneg f128:$vTi))))]>,
+    RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
+    AltVSXFMARel;
+
+- Round to Odd of QP (Negative) Multiply-{Add/Subtract}:
+  xsmaddqpo xsmsubqpo xsnmaddqpo xsnmsubqpo
+  . Similar to xsrsqrtedp??
+
+  . Define DAG Node in PPCInstrInfo.td:
+    def PPCfmarto: SDNode<"PPCISD::FMARTO", SDTFPTernaryOp, []>;
+
+    It looks like we only need to define "PPCfmarto" for these instructions,
+    because according to PowerISA_V3.0, these instructions perform RTO on
+    fma's result:
+        xsmaddqp(o)
+        v      ← bfp_MULTIPLY_ADD(src1, src3, src2)
+        rnd    ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)
+        result ← bfp_CONVERT_TO_BFP128(rnd)
+
+        xsmsubqp(o)
+        v      ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2))
+        rnd    ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)
+        result ← bfp_CONVERT_TO_BFP128(rnd)
+
+        xsnmaddqp(o)
+        v      ← bfp_MULTIPLY_ADD(src1,src3,src2)
+        rnd    ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v))
+        result ← bfp_CONVERT_TO_BFP128(rnd)
+
+        xsnmsubqp(o)
+        v      ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2))
+        rnd    ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v))
+        result ← bfp_CONVERT_TO_BFP128(rnd)
+
+    DAG patterns of each instruction (PPCInstrVSX.td):
+    . isCommutable = 1
+      // xsmaddqpo
+      [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, f128:$vTi))]>,
+      RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
+      AltVSXFMARel;
+
+      // xsmsubqpo
+      [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi)))]>,
+      RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
+      AltVSXFMARel;
+
+      // xsnmaddqpo
+      [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, f128:$vTi)))]>,
+      RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
+      AltVSXFMARel;
+
+      // xsnmsubqpo
+      [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi))))]>,
+      RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
+      AltVSXFMARel;
 
 - QP Compare Ordered/Unordered: xscmpoqp xscmpuqp
   . ref: XSCMPUDP
author	Chuang-Yu Cheng <cycheng@multicorewareinc.com>	2016-03-28 07:38:01 +0000
committer	Chuang-Yu Cheng <cycheng@multicorewareinc.com>	2016-03-28 07:38:01 +0000
commit	5663848996b01faeb58276be25d5e364378fc1c9 (patch)
tree	2bc81f0ed8d738d001e8a1748b222f0ece64a835 /llvm/lib/Target
parent	ed638864d3e19559ca0d88523356a1f5bd141b99 (diff)
download	bcm5719-llvm-5663848996b01faeb58276be25d5e364378fc1c9.tar.gz bcm5719-llvm-5663848996b01faeb58276be25d5e364378fc1c9.zip