diff options
author | Kit Barton <kbarton@ca.ibm.com> | 2016-02-26 21:11:55 +0000 |
---|---|---|
committer | Kit Barton <kbarton@ca.ibm.com> | 2016-02-26 21:11:55 +0000 |
commit | 93612ec5f2f7e68c68b0619792d60631bb02e968 (patch) | |
tree | 909cfa2032bad80d88e77e9dabee99b533e47918 /llvm/lib | |
parent | e50f74474378e2179e05c770516292ed152ed8db (diff) | |
download | bcm5719-llvm-93612ec5f2f7e68c68b0619792d60631bb02e968.tar.gz bcm5719-llvm-93612ec5f2f7e68c68b0619792d60631bb02e968.zip |
Power9] Implement new vsx instructions: compare and conversion
This change implements the following vsx instructions:
Quad/Double-Precision Compare:
xscmpoqp xscmpuqp
xscmpexpdp xscmpexpqp
xscmpeqdp xscmpgedp xscmpgtdp xscmpnedp
xvcmpnedp(.) xvcmpnesp(.)
Quad-Precision Floating-Point Conversion
xscvqpdp(o) xscvdpqp
xscvqpsdz xscvqpswz xscvqpudz xscvqpuwz xscvsdqp xscvudqp
xscvdphp xscvhpdp xvcvhpsp xvcvsphp
xsrqpi xsrqpix xsrqpxp
28 instructions
Phabricator: http://reviews.llvm.org/D16709
llvm-svn: 262068
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/PowerPC/PPC.td | 6 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrFormats.td | 23 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrVSX.td | 135 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCSubtarget.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCSubtarget.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/README_P9.txt | 87 |
6 files changed, 257 insertions, 0 deletions
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index b03be12cfd9..9e22701d01c 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -124,6 +124,12 @@ def FeatureP8Crypto : SubtargetFeature<"crypto", "HasP8Crypto", "true", def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true", "Enable POWER8 vector instructions", [FeatureVSX, FeatureP8Altivec]>; +def FeatureP9Altivec : SubtargetFeature<"power9-altivec", "HasP9Altivec", "true", + "Enable POWER9 Altivec instructions", + [FeatureP8Altivec]>; +def FeatureP9Vector : SubtargetFeature<"power9-vector", "HasP9Vector", "true", + "Enable POWER9 vector instructions", + [FeatureP8Vector, FeatureP9Altivec]>; def FeatureDirectMove : SubtargetFeature<"direct-move", "HasDirectMove", "true", "Enable Power8 direct move instructions", diff --git a/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/llvm/lib/Target/PowerPC/PPCInstrFormats.td index 4e03ed27653..36392b1304c 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFormats.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFormats.td @@ -747,6 +747,13 @@ class XForm_htm3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let Inst{31} = RC; } +// e.g. [PO VRT XO VRB XO /] or [PO VRT XO VRB XO RO] +class X_RD5_XO5_RS5<bits<6> opcode, bits<5> xo2, bits<10> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let A = xo2; +} + // XX*-Form (VSX) class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> @@ -820,6 +827,22 @@ class XX2Form_2<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr, let Inst{31} = XT{5}; } +class XX2_RD6_XO5_RS6<bits<6> opcode, bits<5> xo2, bits<9> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<6> XT; + bits<6> XB; + + let Pattern = pattern; + + let Inst{6-10} = XT{4-0}; + let Inst{11-15} = xo2; + let Inst{16-20} = XB{4-0}; + let Inst{21-29} = xo; + let Inst{30} = XB{5}; + let Inst{31} = XT{5}; +} + class XX3Form<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> : I<opcode, OOL, IOL, asmstr, itin> { diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index df1142cb42f..a16d9e1696a 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -1783,3 +1783,138 @@ def : Pat<(i64 (bitconvert f64:$S)), def : Pat<(f64 (bitconvert i64:$S)), (f64 (MTVSRD $S))>; } + +// The following VSX instructions were introduced in Power ISA 3.0 +def HasP9Vector : Predicate<"PPCSubTarget->hasP9Vector()">; +let Predicates = [HasP9Vector] in { + + // [PO VRT XO VRB XO /] + class X_VT5_XO5_VB5<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc, + list<dag> pattern> + : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vrrc:$vB), + !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>; + + // [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /] + class X_VT5_XO5_VB5_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc, + list<dag> pattern> + : X_VT5_XO5_VB5<opcode, xo2, xo, opc, pattern>, isDOT; + + // [PO VRT XO VRB XO /], but the VRB is only used the left 64 bits (or less), + // So we use different operand class for VRB + class X_VT5_XO5_VB5_TyVB<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc, + RegisterOperand vbtype, list<dag> pattern> + : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vbtype:$vB), + !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>; + + // [PO T XO B XO BX TX] + class XX2_XT6_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc, + RegisterOperand vtype, list<dag> pattern> + : XX2_RD6_XO5_RS6<opcode, xo2, xo, (outs vtype:$XT), (ins vtype:$XB), + !strconcat(opc, " $XT, $XB"), IIC_VecFP, pattern>; + + // [PO T A B XO AX BX TX], src and dest register use different operand class + class XX3_XT5_XA5_XB5<bits<6> opcode, bits<8> xo, string opc, + RegisterOperand xty, RegisterOperand aty, RegisterOperand bty, + InstrItinClass itin, list<dag> pattern> + : XX3Form<opcode, xo, (outs xty:$XT), (ins aty:$XA, bty:$XB), + !strconcat(opc, " $XT, $XA, $XB"), itin, pattern>; + + //===--------------------------------------------------------------------===// + // Quad/Double-Precision Compare Instructions: + + // [PO BF // VRA VRB XO /] + class X_BF3_VA5_VB5<bits<6> opcode, bits<10> xo, string opc, + list<dag> pattern> + : XForm_17<opcode, xo, (outs crrc:$crD), (ins vrrc:$VA, vrrc:$VB), + !strconcat(opc, " $crD, $VA, $VB"), IIC_FPCompare> { + let Pattern = pattern; + } + + // QP Compare Ordered/Unordered + def XSCMPOQP : X_BF3_VA5_VB5<63, 132, "xscmpoqp", []>; + def XSCMPUQP : X_BF3_VA5_VB5<63, 644, "xscmpuqp", []>; + + // DP/QP Compare Exponents + def XSCMPEXPDP : XX3Form_1<60, 59, + (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), + "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>; + def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>; + + // DP Compare ==, >=, >, != + // Use vsrc for XT, because the entire register of XT is set. + // XT.dword[1] = 0x0000_0000_0000_0000 + def XSCMPEQDP : XX3_XT5_XA5_XB5<60, 3, "xscmpeqdp", vsrc, vsfrc, vsfrc, + IIC_FPCompare, []>; + def XSCMPGEDP : XX3_XT5_XA5_XB5<60, 19, "xscmpgedp", vsrc, vsfrc, vsfrc, + IIC_FPCompare, []>; + def XSCMPGTDP : XX3_XT5_XA5_XB5<60, 11, "xscmpgtdp", vsrc, vsfrc, vsfrc, + IIC_FPCompare, []>; + def XSCMPNEDP : XX3_XT5_XA5_XB5<60, 27, "xscmpnedp", vsrc, vsfrc, vsfrc, + IIC_FPCompare, []>; + // Vector Compare Not Equal + def XVCMPNEDP : XX3Form_Rc<60, 123, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvcmpnedp $XT, $XA, $XB", IIC_VecFPCompare, []>; + let Defs = [CR6] in + def XVCMPNEDPo : XX3Form_Rc<60, 123, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvcmpnedp. $XT, $XA, $XB", IIC_VecFPCompare, []>, + isDOT; + def XVCMPNESP : XX3Form_Rc<60, 91, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvcmpnesp $XT, $XA, $XB", IIC_VecFPCompare, []>; + let Defs = [CR6] in + def XVCMPNESPo : XX3Form_Rc<60, 91, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xvcmpnesp. $XT, $XA, $XB", IIC_VecFPCompare, []>, + isDOT; + + //===--------------------------------------------------------------------===// + // Quad-Precision Floating-Point Conversion Instructions: + + // Convert DP -> QP + def XSCVDPQP : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vsfrc, []>; + + // Round & Convert QP -> DP (dword[1] is set to zero) + def XSCVQPDP : X_VT5_XO5_VB5 <63, 20, 836, "xscvqpdp" , []>; + def XSCVQPDPO : X_VT5_XO5_VB5_Ro<63, 20, 836, "xscvqpdpo", []>; + + // Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero) + def XSCVQPSDZ : X_VT5_XO5_VB5<63, 25, 836, "xscvqpsdz", []>; + def XSCVQPSWZ : X_VT5_XO5_VB5<63, 9, 836, "xscvqpswz", []>; + def XSCVQPUDZ : X_VT5_XO5_VB5<63, 17, 836, "xscvqpudz", []>; + def XSCVQPUWZ : X_VT5_XO5_VB5<63, 1, 836, "xscvqpuwz", []>; + + // Convert (Un)Signed DWord -> QP + def XSCVSDQP : X_VT5_XO5_VB5_TyVB<63, 10, 836, "xscvsdqp", vsfrc, []>; + def XSCVUDQP : X_VT5_XO5_VB5_TyVB<63, 2, 836, "xscvudqp", vsfrc, []>; + + //===--------------------------------------------------------------------===// + // Round to Floating-Point Integer Instructions + + // (Round &) Convert DP <-> HP + // Note! xscvdphp's src and dest register both use the left 64 bits, so we use + // vsfrc for src and dest register. xscvhpdp's src only use the left 16 bits, + // but we still use vsfrc for it. + def XSCVDPHP : XX2_XT6_XO5_XB6<60, 17, 347, "xscvdphp", vsfrc, []>; + def XSCVHPDP : XX2_XT6_XO5_XB6<60, 16, 347, "xscvhpdp", vsfrc, []>; + + // Vector HP -> SP + def XVCVHPSP : XX2_XT6_XO5_XB6<60, 24, 475, "xvcvhpsp", vsrc, []>; + def XVCVSPHP : XX2_XT6_XO5_XB6<60, 25, 475, "xvcvsphp", vsrc, []>; + + class Z23_VT5_R1_VB5_RMC2_EX1<bits<6> opcode, bits<8> xo, bit ex, string opc, + list<dag> pattern> + : Z23Form_1<opcode, xo, + (outs vrrc:$vT), (ins u1imm:$r, vrrc:$vB, u2imm:$rmc), + !strconcat(opc, " $r, $vT, $vB, $rmc"), IIC_VecFP, pattern> { + let RC = ex; + } + + // Round to Quad-Precision Integer [with Inexact] + def XSRQPI : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 0, "xsrqpi" , []>; + def XSRQPIX : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 1, "xsrqpix", []>; + + // Round Quad-Precision to Double-Extended Precision (fp80) + def XSRQPXP : Z23_VT5_R1_VB5_RMC2_EX1<63, 37, 0, "xsrqpxp", []>; +} // end HasP9Vector diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index c0fcb6cbb9d..c357c7564d7 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -70,6 +70,8 @@ void PPCSubtarget::initializeEnvironment() { HasP8Vector = false; HasP8Altivec = false; HasP8Crypto = false; + HasP9Vector = false; + HasP9Altivec = false; HasFCPSGN = false; HasFSQRT = false; HasFRE = false; diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h index 1c6f7a7a060..7ff77ce2522 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -92,6 +92,8 @@ protected: bool HasP8Vector; bool HasP8Altivec; bool HasP8Crypto; + bool HasP9Vector; + bool HasP9Altivec; bool HasFCPSGN; bool HasFSQRT; bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES; @@ -230,6 +232,8 @@ public: bool hasP8Vector() const { return HasP8Vector; } bool hasP8Altivec() const { return HasP8Altivec; } bool hasP8Crypto() const { return HasP8Crypto; } + bool hasP9Vector() const { return HasP9Vector; } + bool hasP9Altivec() const { return HasP9Altivec; } bool hasMFOCRF() const { return HasMFOCRF; } bool hasISEL() const { return HasISEL; } bool hasPOPCNTD() const { return HasPOPCNTD; } diff --git a/llvm/lib/Target/PowerPC/README_P9.txt b/llvm/lib/Target/PowerPC/README_P9.txt new file mode 100644 index 00000000000..3138dc33b99 --- /dev/null +++ b/llvm/lib/Target/PowerPC/README_P9.txt @@ -0,0 +1,87 @@ +//===- README_P9.txt - Notes for improving Power9 code gen ----------------===// + +TODO: Instructions Need Implement Instrinstics or Map to LLVM IR + +Altivec: + +VSX: + +- QP Compare Ordered/Unordered: xscmpoqp xscmpuqp + . ref: XSCMPUDP + def XSCMPUDP : XX3Form_1<60, 35, + (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB), + "xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>; + + . No SDAG, intrinsic, builtin are required?? + Or llvm fcmp order/unorder compare?? + +- DP/QP Compare Exponents: xscmpexpdp xscmpexpqp + . No SDAG, intrinsic, builtin are required? + +- DP Compare ==, >=, >, !=: xscmpeqdp xscmpgedp xscmpgtdp xscmpnedp + . I checked existing instruction "XSCMPUDP". They are different in target + register. "XSCMPUDP" write to CR field, xscmp*dp write to VSX register + + . Use instrinsic: + (set i128:$XT, (int_ppc_vsx_xscmpeqdp f64:$XA, f64:$XB)) + (set i128:$XT, (int_ppc_vsx_xscmpgedp f64:$XA, f64:$XB)) + (set i128:$XT, (int_ppc_vsx_xscmpgtdp f64:$XA, f64:$XB)) + (set i128:$XT, (int_ppc_vsx_xscmpnedp f64:$XA, f64:$XB)) + +- Vector Compare Not Equal: xvcmpnedp xvcmpnedp. xvcmpnesp xvcmpnesp. + . Similar to xvcmpeqdp: + defm XVCMPEQDP : XX3Form_Rcr<60, 99, + "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare, + int_ppc_vsx_xvcmpeqdp, v2i64, v2f64>; + + . So we should use "XX3Form_Rcr" to implement instrinsic + +- Convert DP -> QP: xscvdpqp + . Similar to XSCVDPSP: + def XSCVDPSP : XX2Form<60, 265, + (outs vsfrc:$XT), (ins vsfrc:$XB), + "xscvdpsp $XT, $XB", IIC_VecFP, []>; + . So, No SDAG, intrinsic, builtin are required?? + +- Round & Convert QP -> DP (dword[1] is set to zero): xscvqpdp xscvqpdpo + . Similar to XSCVDPSP + . No SDAG, intrinsic, builtin are required?? + +- Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero): + xscvqpsdz xscvqpswz xscvqpudz xscvqpuwz + . According to PowerISA_V3.0, these are similar to "XSCVDPSXDS", "XSCVDPSXWS", + "XSCVDPUXDS", "XSCVDPUXWS" + + . DAG patterns: + (set f128:$XT, (PPCfctidz f128:$XB)) // xscvqpsdz + (set f128:$XT, (PPCfctiwz f128:$XB)) // xscvqpswz + (set f128:$XT, (PPCfctiduz f128:$XB)) // xscvqpudz + (set f128:$XT, (PPCfctiwuz f128:$XB)) // xscvqpuwz + +- Convert (Un)Signed DWord -> QP: xscvsdqp xscvudqp + . Similar to XSCVSXDSP + . (set f128:$XT, (PPCfcfids f64:$XB)) // xscvsdqp + (set f128:$XT, (PPCfcfidus f64:$XB)) // xscvudqp + +- (Round &) Convert DP <-> HP: xscvdphp xscvhpdp + . Similar to XSCVDPSP + . No SDAG, intrinsic, builtin are required?? + +- Vector HP -> SP: xvcvhpsp xvcvsphp + . Similar to XVCVDPSP: + def XVCVDPSP : XX2Form<60, 393, + (outs vsrc:$XT), (ins vsrc:$XB), + "xvcvdpsp $XT, $XB", IIC_VecFP, []>; + . No SDAG, intrinsic, builtin are required?? + +- Round to Quad-Precision Integer: xsrqpi xsrqpix + . These are combination of "XSRDPI", "XSRDPIC", "XSRDPIM", .., because you + need to assign rounding mode in instruction + . Provide builtin? + (set f128:$vT, (int_ppc_vsx_xsrqpi f128:$vB)) + (set f128:$vT, (int_ppc_vsx_xsrqpix f128:$vB)) + +- Round Quad-Precision to Double-Extended Precision (fp80): xsrqpxp + . Provide builtin? + (set f128:$vT, (int_ppc_vsx_xsrqpxp f128:$vB)) + |