diff options
author | Chuang-Yu Cheng <cycheng@multicorewareinc.com> | 2016-03-26 05:46:11 +0000 |
---|---|---|
committer | Chuang-Yu Cheng <cycheng@multicorewareinc.com> | 2016-03-26 05:46:11 +0000 |
commit | 065969ec8e492eb8f9724492bda55d3ec9b7e68d (patch) | |
tree | 0865e77923eacbad0f8633a5aaf8bd618ec13f9b /llvm/lib | |
parent | 01e321306b9f505afa2c15428bfcb2143a70a95f (diff) | |
download | bcm5719-llvm-065969ec8e492eb8f9724492bda55d3ec9b7e68d.tar.gz bcm5719-llvm-065969ec8e492eb8f9724492bda55d3ec9b7e68d.zip |
[Power9] Implement new altivec instructions: permute, count zero, extend sign, negate, parity, shift/rotate, mul10
This change implements the following vector operations:
- vclzlsbb vctzlsbb vctzb vctzd vctzh vctzw
- vextsb2w vextsh2w vextsb2d vextsh2d vextsw2d
- vnegd vnegw
- vprtybd vprtybq vprtybw
- vbpermd vpermr
- vrlwnm vrlwmi vrldnm vrldmi vslv vsrv
- vmul10cuq vmul10uq vmul10ecuq vmul10euq
28 instructions
Thanks Nemanja, Kit for invaluable hints and discussion!
Reviewers: hal, nemanja, kbarton, tjablin, amehsan
Phabricator: http://reviews.llvm.org/D15887
llvm-svn: 264504
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrAltivec.td | 61 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrFormats.td | 15 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/README_P9.txt | 105 |
3 files changed, 181 insertions, 0 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index 989946cf394..a617020c16d 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -1277,4 +1277,65 @@ def VINSERTB : VX1_VT5_UIM5_VB5<781, "vinsertb", []>; def VINSERTH : VX1_VT5_UIM5_VB5<845, "vinserth", []>; def VINSERTW : VX1_VT5_UIM5_VB5<909, "vinsertw", []>; def VINSERTD : VX1_VT5_UIM5_VB5<973, "vinsertd", []>; + +class VX_VT5_EO5_VB5<bits<11> xo, bits<5> eo, string opc, list<dag> pattern> + : VXForm_RD5_XO5_RS5<xo, eo, (outs vrrc:$vD), (ins vrrc:$vB), + !strconcat(opc, " $vD, $vB"), IIC_VecGeneral, pattern>; + +// Vector Count Leading/Trailing Zero LSB. Result is placed into GPR[rD] +def VCLZLSBB : VXForm_RD5_XO5_RS5<1538, 0, (outs g8rc:$rD), (ins vrrc:$vB), + "vclzlsbb $rD, $vB", IIC_VecGeneral, []>; +def VCTZLSBB : VXForm_RD5_XO5_RS5<1538, 1, (outs g8rc:$rD), (ins vrrc:$vB), + "vctzlsbb $rD, $vB", IIC_VecGeneral, []>; +// Vector Count Trailing Zeros +def VCTZB : VX_VT5_EO5_VB5<1538, 28, "vctzb", []>; +def VCTZH : VX_VT5_EO5_VB5<1538, 29, "vctzh", []>; +def VCTZW : VX_VT5_EO5_VB5<1538, 30, "vctzw", []>; +def VCTZD : VX_VT5_EO5_VB5<1538, 31, "vctzd", []>; + +// Vector Extend Sign +def VEXTSB2W : VX_VT5_EO5_VB5<1538, 16, "vextsb2w", []>; +def VEXTSH2W : VX_VT5_EO5_VB5<1538, 17, "vextsh2w", []>; +def VEXTSB2D : VX_VT5_EO5_VB5<1538, 24, "vextsb2d", []>; +def VEXTSH2D : VX_VT5_EO5_VB5<1538, 25, "vextsh2d", []>; +def VEXTSW2D : VX_VT5_EO5_VB5<1538, 26, "vextsw2d", []>; + +// Vector Integer Negate +def VNEGW : VX_VT5_EO5_VB5<1538, 6, "vnegw", []>; +def VNEGD : VX_VT5_EO5_VB5<1538, 7, "vnegd", []>; + +// Vector Parity Byte +def VPRTYBW : VX_VT5_EO5_VB5<1538, 8, "vprtybw", []>; +def VPRTYBD : VX_VT5_EO5_VB5<1538, 9, "vprtybd", []>; +def VPRTYBQ : VX_VT5_EO5_VB5<1538, 10, "vprtybq", []>; + +// Vector (Bit) Permute (Right-indexed) +def VBPERMD : VXForm_1<1484, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vbpermd $vD, $vA, $vB", IIC_VecFP, []>; +def VPERMR : VAForm_1a<59, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC), + "vpermr $vD, $vA, $vB, $vC", IIC_VecFP, []>; + +class VX1_VT5_VA5_VB5<bits<11> xo, string opc, list<dag> pattern> + : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP, pattern>; + +// Vector Rotate Left Mask/Mask-Insert +def VRLWNM : VX1_VT5_VA5_VB5<389, "vrlwnm", []>; +def VRLWMI : VX1_VT5_VA5_VB5<133, "vrlwmi", []>; +def VRLDNM : VX1_VT5_VA5_VB5<453, "vrldnm", []>; +def VRLDMI : VX1_VT5_VA5_VB5<197, "vrldmi", []>; + +// Vector Shift Left/Right +def VSLV : VX1_VT5_VA5_VB5<1860, "vslv", []>; +def VSRV : VX1_VT5_VA5_VB5<1796, "vsrv", []>; + +// Vector Multiply-by-10 (& Write Carry) Unsigned Quadword +def VMUL10UQ : VXForm_BX<513, (outs vrrc:$vD), (ins vrrc:$vA), + "vmul10uq $vD, $vA", IIC_VecFP, []>; +def VMUL10CUQ : VXForm_BX< 1, (outs vrrc:$vD), (ins vrrc:$vA), + "vmul10cuq $vD, $vA", IIC_VecFP, []>; + +// Vector Multiply-by-10 Extended (& Write Carry) Unsigned Quadword +def VMUL10EUQ : VX1_VT5_VA5_VB5<577, "vmul10euq" , []>; +def VMUL10ECUQ : VX1_VT5_VA5_VB5< 65, "vmul10ecuq", []>; } // end HasP9Altivec diff --git a/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/llvm/lib/Target/PowerPC/PPCInstrFormats.td index 999359bf3ad..b11565ce8b5 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFormats.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFormats.td @@ -1609,6 +1609,21 @@ class VXForm_5<bits<11> xo, dag OOL, dag IOL, string asmstr, let Inst{21-31} = xo; } +// e.g. [PO VRT EO VRB XO] +class VXForm_RD5_XO5_RS5<bits<11> xo, bits<5> eo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> RD; + bits<5> VB; + + let Pattern = pattern; + + let Inst{6-10} = RD; + let Inst{11-15} = eo; + let Inst{16-20} = VB; + let Inst{21-31} = xo; +} + /// VXForm_CR - VX crypto instructions with "VRT, VRA, ST, SIX" class VXForm_CR<bits<11> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> diff --git a/llvm/lib/Target/PowerPC/README_P9.txt b/llvm/lib/Target/PowerPC/README_P9.txt index 31038c92948..5482d4a3fc3 100644 --- a/llvm/lib/Target/PowerPC/README_P9.txt +++ b/llvm/lib/Target/PowerPC/README_P9.txt @@ -35,6 +35,111 @@ Altivec: (set v4i32:$vD, (int_ppc_altivec_vinserth v4i32:$vA, imm:$UIMM)) (set v2i64:$vD, (int_ppc_altivec_vinsertw v2i64:$vA, imm:$UIMM)) +- Vector Count Leading/Trailing Zero LSB. Result is placed into GPR[rD]: + vclzlsbb vctzlsbb + . Use intrinsic: + (set i64:$rD, (int_ppc_altivec_vclzlsbb v16i8:$vB)) + (set i64:$rD, (int_ppc_altivec_vctzlsbb v16i8:$vB)) + +- Vector Count Trailing Zeros: vctzb vctzh vctzw vctzd + . Map to llvm cttz + (set v16i8:$vD, (cttz v16i8:$vB)) // vctzb + (set v8i16:$vD, (cttz v8i16:$vB)) // vctzh + (set v4i32:$vD, (cttz v4i32:$vB)) // vctzw + (set v2i64:$vD, (cttz v2i64:$vB)) // vctzd + +- Vector Extend Sign: vextsb2w vextsh2w vextsb2d vextsh2d vextsw2d + . vextsb2w: + (set v4i32:$vD, (sext v4i8:$vB)) + + // PowerISA_V3.0: + do i = 0 to 3 + VR[VRT].word[i] ← EXTS32(VR[VRB].word[i].byte[3]) + end + + . vextsh2w: + (set v4i32:$vD, (sext v4i16:$vB)) + + // PowerISA_V3.0: + do i = 0 to 3 + VR[VRT].word[i] ← EXTS32(VR[VRB].word[i].hword[1]) + end + + . vextsb2d + (set v2i64:$vD, (sext v2i8:$vB)) + + // PowerISA_V3.0: + do i = 0 to 1 + VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].byte[7]) + end + + . vextsh2d + (set v2i64:$vD, (sext v2i16:$vB)) + + // PowerISA_V3.0: + do i = 0 to 1 + VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].hword[3]) + end + + . vextsw2d + (set v2i64:$vD, (sext v2i32:$vB)) + + // PowerISA_V3.0: + do i = 0 to 1 + VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].word[1]) + end + +- Vector Integer Negate: vnegw vnegd + . Map to llvm ineg + (set v4i32:$rT, (ineg v4i32:$rA)) // vnegw + (set v2i64:$rT, (ineg v2i64:$rA)) // vnegd + +- Vector Parity Byte: vprtybw vprtybd vprtybq + . Use intrinsic: + (set v4i32:$rD, (int_ppc_altivec_vprtybw v4i32:$vB)) + (set v2i64:$rD, (int_ppc_altivec_vprtybd v2i64:$vB)) + (set v1i128:$rD, (int_ppc_altivec_vprtybq v1i128:$vB)) + +- Vector (Bit) Permute (Right-indexed): + . vbpermd: Same as "vbpermq", use VX1_Int_Ty2: + VX1_Int_Ty2<1484, "vbpermd", int_ppc_altivec_vbpermd, v2i64, v2i64>; + + . vpermr: use VA1a_Int_Ty3 + VA1a_Int_Ty3<59, "vpermr", int_ppc_altivec_vpermr, v16i8, v16i8, v16i8>; + +- Vector Rotate Left Mask/Mask-Insert: vrlwnm vrlwmi vrldnm vrldmi + . Use intrinsic: + VX1_Int_Ty<389, "vrlwnm", int_ppc_altivec_vrlwnm, v4i32>; + VX1_Int_Ty<133, "vrlwmi", int_ppc_altivec_vrlwmi, v4i32>; + VX1_Int_Ty<453, "vrldnm", int_ppc_altivec_vrldnm, v2i64>; + VX1_Int_Ty<197, "vrldmi", int_ppc_altivec_vrldmi, v2i64>; + +- Vector Shift Left/Right: vslv vsrv + . Use intrinsic, don't map to llvm shl and lshr, because they have different + semantics, e.g. vslv: + + do i = 0 to 15 + sh ← VR[VRB].byte[i].bit[5:7] + VR[VRT].byte[i] ← src.byte[i:i+1].bit[sh:sh+7] + end + + VR[VRT].byte[i] is composed of 2 bytes from src.byte[i:i+1] + + . VX1_Int_Ty<1860, "vslv", int_ppc_altivec_vslv, v16i8>; + VX1_Int_Ty<1796, "vsrv", int_ppc_altivec_vsrv, v16i8>; + +- Vector Multiply-by-10 (& Write Carry) Unsigned Quadword: + vmul10uq vmul10cuq + . Use intrinsic: + VX1_Int_Ty<513, "vmul10uq", int_ppc_altivec_vmul10uq, v1i128>; + VX1_Int_Ty< 1, "vmul10cuq", int_ppc_altivec_vmul10cuq, v1i128>; + +- Vector Multiply-by-10 Extended (& Write Carry) Unsigned Quadword: + vmul10euq vmul10ecuq + . Use intrinsic: + VX1_Int_Ty<577, "vmul10euq", int_ppc_altivec_vmul10euq, v1i128>; + VX1_Int_Ty< 65, "vmul10ecuq", int_ppc_altivec_vmul10ecuq, v1i128>; + VSX: - QP Compare Ordered/Unordered: xscmpoqp xscmpuqp |