Power9] Implement new vsx instructions: compare and conversion

This change implements the following vsx instructions: Quad/Double-Precision Compare: xscmpoqp xscmpuqp xscmpexpdp xscmpexpqp xscmpeqdp xscmpgedp xscmpgtdp xscmpnedp xvcmpnedp(.) xvcmpnesp(.) Quad-Precision Floating-Point Conversion xscvqpdp(o) xscvdpqp xscvqpsdz xscvqpswz xscvqpudz xscvqpuwz xscvsdqp xscvudqp xscvdphp xscvhpdp xvcvhpsp xvcvsphp xsrqpi xsrqpix xsrqpxp 28 instructions Phabricator: http://reviews.llvm.org/D16709 llvm-svn: 262068
author: Kit Barton <kbarton@ca.ibm.com> 2016-02-26 21:11:55 +0000
committer: Kit Barton <kbarton@ca.ibm.com> 2016-02-26 21:11:55 +0000
commit: 93612ec5f2f7e68c68b0619792d60631bb02e968 (patch)
tree: 909cfa2032bad80d88e77e9dabee99b533e47918 /llvm/lib
parent: e50f74474378e2179e05c770516292ed152ed8db (diff)
download: bcm5719-llvm-93612ec5f2f7e68c68b0619792d60631bb02e968.tar.gz
bcm5719-llvm-93612ec5f2f7e68c68b0619792d60631bb02e968.zip
6 files changed, 257 insertions, 0 deletions
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index b03be12cfd9..9e22701d01c 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -124,6 +124,12 @@ def FeatureP8Crypto : SubtargetFeature<"crypto", "HasP8Crypto", "true",
 def FeatureP8Vector  : SubtargetFeature<"power8-vector", "HasP8Vector", "true",
                                         "Enable POWER8 vector instructions",
                                         [FeatureVSX, FeatureP8Altivec]>;
+def FeatureP9Altivec : SubtargetFeature<"power9-altivec", "HasP9Altivec", "true",
+                                        "Enable POWER9 Altivec instructions",
+                                        [FeatureP8Altivec]>;
+def FeatureP9Vector  : SubtargetFeature<"power9-vector", "HasP9Vector", "true",
+                                        "Enable POWER9 vector instructions",
+                                        [FeatureP8Vector, FeatureP9Altivec]>;
 def FeatureDirectMove :
   SubtargetFeature<"direct-move", "HasDirectMove", "true",
                    "Enable Power8 direct move instructions",
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index 4e03ed27653..36392b1304c 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -747,6 +747,13 @@ class XForm_htm3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
   let Inst{31}    = RC;
 }
 
+// e.g. [PO VRT XO VRB XO /] or [PO VRT XO VRB XO RO]
+class X_RD5_XO5_RS5<bits<6> opcode, bits<5> xo2, bits<10> xo, dag OOL, dag IOL,
+                    string asmstr, InstrItinClass itin, list<dag> pattern>
+  : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let A = xo2;
+}
+
 // XX*-Form (VSX)
 class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, 
               InstrItinClass itin, list<dag> pattern>
@@ -820,6 +827,22 @@ class XX2Form_2<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
   let Inst{31}    = XT{5};
 }
 
+class XX2_RD6_XO5_RS6<bits<6> opcode, bits<5> xo2, bits<9> xo, dag OOL, dag IOL,
+                      string asmstr, InstrItinClass itin, list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<6> XT;
+  bits<6> XB;
+
+  let Pattern = pattern;
+
+  let Inst{6-10}  = XT{4-0};
+  let Inst{11-15} = xo2;
+  let Inst{16-20} = XB{4-0};
+  let Inst{21-29} = xo;
+  let Inst{30}    = XB{5};
+  let Inst{31}    = XT{5};
+}
+
 class XX3Form<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, 
               InstrItinClass itin, list<dag> pattern>
   : I<opcode, OOL, IOL, asmstr, itin> {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index df1142cb42f..a16d9e1696a 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -1783,3 +1783,138 @@ def : Pat<(i64 (bitconvert f64:$S)),
 def : Pat<(f64 (bitconvert i64:$S)),
           (f64 (MTVSRD $S))>;
 }
+
+// The following VSX instructions were introduced in Power ISA 3.0
+def HasP9Vector : Predicate<"PPCSubTarget->hasP9Vector()">;
+let Predicates = [HasP9Vector] in {
+
+  // [PO VRT XO VRB XO /]
+  class X_VT5_XO5_VB5<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
+                      list<dag> pattern>
+    : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vrrc:$vB),
+                    !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
+
+  // [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /]
+  class X_VT5_XO5_VB5_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
+                         list<dag> pattern>
+    : X_VT5_XO5_VB5<opcode, xo2, xo, opc, pattern>, isDOT;
+
+  // [PO VRT XO VRB XO /], but the VRB is only used the left 64 bits (or less),
+  // So we use different operand class for VRB
+  class X_VT5_XO5_VB5_TyVB<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
+                           RegisterOperand vbtype, list<dag> pattern>
+    : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vbtype:$vB),
+                    !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
+
+  // [PO T XO B XO BX TX]
+  class XX2_XT6_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
+                        RegisterOperand vtype, list<dag> pattern>
+    : XX2_RD6_XO5_RS6<opcode, xo2, xo, (outs vtype:$XT), (ins vtype:$XB),
+                      !strconcat(opc, " $XT, $XB"), IIC_VecFP, pattern>;
+
+  // [PO T A B XO AX BX TX], src and dest register use different operand class
+  class XX3_XT5_XA5_XB5<bits<6> opcode, bits<8> xo, string opc,
+                  RegisterOperand xty, RegisterOperand aty, RegisterOperand bty,
+                  InstrItinClass itin, list<dag> pattern>
+    : XX3Form<opcode, xo, (outs xty:$XT), (ins aty:$XA, bty:$XB),
+              !strconcat(opc, " $XT, $XA, $XB"), itin, pattern>;
+
+  //===--------------------------------------------------------------------===//
+  // Quad/Double-Precision Compare Instructions:
+
+  // [PO BF // VRA VRB XO /]
+  class X_BF3_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
+                      list<dag> pattern>
+    : XForm_17<opcode, xo, (outs crrc:$crD), (ins vrrc:$VA, vrrc:$VB),
+               !strconcat(opc, " $crD, $VA, $VB"), IIC_FPCompare> {
+    let Pattern = pattern;
+  }
+
+  // QP Compare Ordered/Unordered
+  def XSCMPOQP : X_BF3_VA5_VB5<63, 132, "xscmpoqp", []>;
+  def XSCMPUQP : X_BF3_VA5_VB5<63, 644, "xscmpuqp", []>;
+
+  // DP/QP Compare Exponents
+  def XSCMPEXPDP : XX3Form_1<60, 59,
+                             (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
+                             "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>;
+  def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>;
+
+  // DP Compare ==, >=, >, !=
+  // Use vsrc for XT, because the entire register of XT is set.
+  // XT.dword[1] = 0x0000_0000_0000_0000
+  def XSCMPEQDP : XX3_XT5_XA5_XB5<60,  3, "xscmpeqdp", vsrc, vsfrc, vsfrc,
+                                  IIC_FPCompare, []>;
+  def XSCMPGEDP : XX3_XT5_XA5_XB5<60, 19, "xscmpgedp", vsrc, vsfrc, vsfrc,
+                                  IIC_FPCompare, []>;
+  def XSCMPGTDP : XX3_XT5_XA5_XB5<60, 11, "xscmpgtdp", vsrc, vsfrc, vsfrc,
+                                  IIC_FPCompare, []>;
+  def XSCMPNEDP : XX3_XT5_XA5_XB5<60, 27, "xscmpnedp", vsrc, vsfrc, vsfrc,
+                                  IIC_FPCompare, []>;
+  // Vector Compare Not Equal
+  def XVCMPNEDP  : XX3Form_Rc<60, 123,
+                              (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+                              "xvcmpnedp  $XT, $XA, $XB", IIC_VecFPCompare, []>;
+  let Defs = [CR6] in
+  def XVCMPNEDPo : XX3Form_Rc<60, 123,
+                              (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+                              "xvcmpnedp. $XT, $XA, $XB", IIC_VecFPCompare, []>,
+                              isDOT;
+  def XVCMPNESP  : XX3Form_Rc<60,  91,
+                              (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+                              "xvcmpnesp  $XT, $XA, $XB", IIC_VecFPCompare, []>;
+  let Defs = [CR6] in
+  def XVCMPNESPo : XX3Form_Rc<60,  91,
+                              (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+                              "xvcmpnesp. $XT, $XA, $XB", IIC_VecFPCompare, []>,
+                              isDOT;
+
+  //===--------------------------------------------------------------------===//
+  // Quad-Precision Floating-Point Conversion Instructions:
+
+  // Convert DP -> QP
+  def XSCVDPQP  : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vsfrc, []>;
+
+  // Round & Convert QP -> DP (dword[1] is set to zero)
+  def XSCVQPDP  : X_VT5_XO5_VB5   <63, 20, 836, "xscvqpdp" , []>;
+  def XSCVQPDPO : X_VT5_XO5_VB5_Ro<63, 20, 836, "xscvqpdpo", []>;
+
+  // Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero)
+  def XSCVQPSDZ : X_VT5_XO5_VB5<63, 25, 836, "xscvqpsdz", []>;
+  def XSCVQPSWZ : X_VT5_XO5_VB5<63,  9, 836, "xscvqpswz", []>;
+  def XSCVQPUDZ : X_VT5_XO5_VB5<63, 17, 836, "xscvqpudz", []>;
+  def XSCVQPUWZ : X_VT5_XO5_VB5<63,  1, 836, "xscvqpuwz", []>;
+
+  // Convert (Un)Signed DWord -> QP
+  def XSCVSDQP  : X_VT5_XO5_VB5_TyVB<63, 10, 836, "xscvsdqp", vsfrc, []>;
+  def XSCVUDQP  : X_VT5_XO5_VB5_TyVB<63,  2, 836, "xscvudqp", vsfrc, []>;
+
+  //===--------------------------------------------------------------------===//
+  // Round to Floating-Point Integer Instructions
+
+  // (Round &) Convert DP <-> HP
+  // Note! xscvdphp's src and dest register both use the left 64 bits, so we use
+  // vsfrc for src and dest register. xscvhpdp's src only use the left 16 bits,
+  // but we still use vsfrc for it.
+  def XSCVDPHP : XX2_XT6_XO5_XB6<60, 17, 347, "xscvdphp", vsfrc, []>;
+  def XSCVHPDP : XX2_XT6_XO5_XB6<60, 16, 347, "xscvhpdp", vsfrc, []>;
+
+  // Vector HP -> SP
+  def XVCVHPSP : XX2_XT6_XO5_XB6<60, 24, 475, "xvcvhpsp", vsrc, []>;
+  def XVCVSPHP : XX2_XT6_XO5_XB6<60, 25, 475, "xvcvsphp", vsrc, []>;
+
+  class Z23_VT5_R1_VB5_RMC2_EX1<bits<6> opcode, bits<8> xo, bit ex, string opc,
+                                list<dag> pattern>
+    : Z23Form_1<opcode, xo,
+                (outs vrrc:$vT), (ins u1imm:$r, vrrc:$vB, u2imm:$rmc),
+                !strconcat(opc, " $r, $vT, $vB, $rmc"), IIC_VecFP, pattern> {
+    let RC = ex;
+  }
+
+  // Round to Quad-Precision Integer [with Inexact]
+  def XSRQPI   : Z23_VT5_R1_VB5_RMC2_EX1<63,  5, 0, "xsrqpi" , []>;
+  def XSRQPIX  : Z23_VT5_R1_VB5_RMC2_EX1<63,  5, 1, "xsrqpix", []>;
+
+  // Round Quad-Precision to Double-Extended Precision (fp80)
+  def XSRQPXP  : Z23_VT5_R1_VB5_RMC2_EX1<63, 37, 0, "xsrqpxp", []>;
+} // end HasP9Vector
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index c0fcb6cbb9d..c357c7564d7 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -70,6 +70,8 @@ void PPCSubtarget::initializeEnvironment() {
   HasP8Vector = false;
   HasP8Altivec = false;
   HasP8Crypto = false;
+  HasP9Vector = false;
+  HasP9Altivec = false;
   HasFCPSGN = false;
   HasFSQRT = false;
   HasFRE = false;
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index 1c6f7a7a060..7ff77ce2522 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -92,6 +92,8 @@ protected:
   bool HasP8Vector;
   bool HasP8Altivec;
   bool HasP8Crypto;
+  bool HasP9Vector;
+  bool HasP9Altivec;
   bool HasFCPSGN;
   bool HasFSQRT;
   bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
@@ -230,6 +232,8 @@ public:
   bool hasP8Vector() const { return HasP8Vector; }
   bool hasP8Altivec() const { return HasP8Altivec; }
   bool hasP8Crypto() const { return HasP8Crypto; }
+  bool hasP9Vector() const { return HasP9Vector; }
+  bool hasP9Altivec() const { return HasP9Altivec; }
   bool hasMFOCRF() const { return HasMFOCRF; }
   bool hasISEL() const { return HasISEL; }
   bool hasPOPCNTD() const { return HasPOPCNTD; }
diff --git a/llvm/lib/Target/PowerPC/README_P9.txt b/llvm/lib/Target/PowerPC/README_P9.txt
new file mode 100644
index 00000000000..3138dc33b99
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/README_P9.txt
@@ -0,0 +1,87 @@
+//===- README_P9.txt - Notes for improving Power9 code gen ----------------===//
+
+TODO: Instructions Need Implement Instrinstics or Map to LLVM IR
+
+Altivec:
+
+VSX:
+
+- QP Compare Ordered/Unordered: xscmpoqp xscmpuqp
+  . ref: XSCMPUDP
+      def XSCMPUDP : XX3Form_1<60, 35,
+                               (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
+                               "xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>;
+
+  . No SDAG, intrinsic, builtin are required??
+    Or llvm fcmp order/unorder compare??
+
+- DP/QP Compare Exponents: xscmpexpdp xscmpexpqp
+  . No SDAG, intrinsic, builtin are required?
+
+- DP Compare ==, >=, >, !=: xscmpeqdp xscmpgedp xscmpgtdp xscmpnedp
+  . I checked existing instruction "XSCMPUDP". They are different in target
+    register. "XSCMPUDP" write to CR field, xscmp*dp write to VSX register
+
+  . Use instrinsic:
+    (set i128:$XT, (int_ppc_vsx_xscmpeqdp f64:$XA, f64:$XB))
+    (set i128:$XT, (int_ppc_vsx_xscmpgedp f64:$XA, f64:$XB))
+    (set i128:$XT, (int_ppc_vsx_xscmpgtdp f64:$XA, f64:$XB))
+    (set i128:$XT, (int_ppc_vsx_xscmpnedp f64:$XA, f64:$XB))
+
+- Vector Compare Not Equal: xvcmpnedp xvcmpnedp. xvcmpnesp xvcmpnesp.
+  . Similar to xvcmpeqdp:
+      defm XVCMPEQDP : XX3Form_Rcr<60, 99,
+                                 "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare,
+                                 int_ppc_vsx_xvcmpeqdp, v2i64, v2f64>;
+
+  . So we should use "XX3Form_Rcr" to implement instrinsic
+
+- Convert DP -> QP: xscvdpqp
+  . Similar to XSCVDPSP:
+      def XSCVDPSP : XX2Form<60, 265,
+                          (outs vsfrc:$XT), (ins vsfrc:$XB),
+                          "xscvdpsp $XT, $XB", IIC_VecFP, []>;
+  . So, No SDAG, intrinsic, builtin are required??
+
+- Round & Convert QP -> DP (dword[1] is set to zero): xscvqpdp xscvqpdpo
+  . Similar to XSCVDPSP
+  . No SDAG, intrinsic, builtin are required??
+
+- Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero):
+  xscvqpsdz xscvqpswz xscvqpudz xscvqpuwz
+  . According to PowerISA_V3.0, these are similar to "XSCVDPSXDS", "XSCVDPSXWS",
+    "XSCVDPUXDS", "XSCVDPUXWS"
+
+  . DAG patterns:
+    (set f128:$XT, (PPCfctidz f128:$XB))    // xscvqpsdz
+    (set f128:$XT, (PPCfctiwz f128:$XB))    // xscvqpswz
+    (set f128:$XT, (PPCfctiduz f128:$XB))   // xscvqpudz
+    (set f128:$XT, (PPCfctiwuz f128:$XB))   // xscvqpuwz
+
+- Convert (Un)Signed DWord -> QP: xscvsdqp xscvudqp
+  . Similar to XSCVSXDSP
+  . (set f128:$XT, (PPCfcfids f64:$XB))     // xscvsdqp
+    (set f128:$XT, (PPCfcfidus f64:$XB))    // xscvudqp
+
+- (Round &) Convert DP <-> HP: xscvdphp xscvhpdp
+  . Similar to XSCVDPSP
+  . No SDAG, intrinsic, builtin are required??
+
+- Vector HP -> SP: xvcvhpsp xvcvsphp
+  . Similar to XVCVDPSP:
+      def XVCVDPSP : XX2Form<60, 393,
+                          (outs vsrc:$XT), (ins vsrc:$XB),
+                          "xvcvdpsp $XT, $XB", IIC_VecFP, []>;
+  . No SDAG, intrinsic, builtin are required??
+
+- Round to Quad-Precision Integer: xsrqpi xsrqpix
+  . These are combination of "XSRDPI", "XSRDPIC", "XSRDPIM", .., because you
+    need to assign rounding mode in instruction
+  . Provide builtin?
+    (set f128:$vT, (int_ppc_vsx_xsrqpi f128:$vB))
+    (set f128:$vT, (int_ppc_vsx_xsrqpix f128:$vB))
+
+- Round Quad-Precision to Double-Extended Precision (fp80): xsrqpxp
+  . Provide builtin?
+    (set f128:$vT, (int_ppc_vsx_xsrqpxp f128:$vB))
+
author	Kit Barton <kbarton@ca.ibm.com>	2016-02-26 21:11:55 +0000
committer	Kit Barton <kbarton@ca.ibm.com>	2016-02-26 21:11:55 +0000
commit	93612ec5f2f7e68c68b0619792d60631bb02e968 (patch)
tree	909cfa2032bad80d88e77e9dabee99b533e47918 /llvm/lib
parent	e50f74474378e2179e05c770516292ed152ed8db (diff)
download	bcm5719-llvm-93612ec5f2f7e68c68b0619792d60631bb02e968.tar.gz bcm5719-llvm-93612ec5f2f7e68c68b0619792d60631bb02e968.zip