diff options
| author | Stefan Pintilie <stefanp@ca.ibm.com> | 2017-08-02 20:07:21 +0000 |
|---|---|---|
| committer | Stefan Pintilie <stefanp@ca.ibm.com> | 2017-08-02 20:07:21 +0000 |
| commit | 873889ca1680e0cd5feafec46daccf96116e2751 (patch) | |
| tree | d09253dde78b15fa887555c3e2f3475f0934101c /llvm/lib/Target | |
| parent | 1b536724d9fb49657b56b25e8f1514b4fe4cea60 (diff) | |
| download | bcm5719-llvm-873889ca1680e0cd5feafec46daccf96116e2751.tar.gz bcm5719-llvm-873889ca1680e0cd5feafec46daccf96116e2751.zip | |
[Power9] Exploit vector absolute difference instructions on Power 9
Power 9 has instructions to do absolute difference (VABSDUB, VABSDUH, VABSDUW)
for byte, halfword and word. We should take advantage of these.
Differential Revision: https://reviews.llvm.org/D34684
llvm-svn: 309876
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 38 | ||||
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrAltivec.td | 15 |
2 files changed, 52 insertions, 1 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 3fa2c76673c..ff88b388fb9 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -226,6 +226,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::UREM, MVT::i64, Expand); } + if (Subtarget.hasP9Vector()) { + setOperationAction(ISD::ABS, MVT::v4i32, Legal); + setOperationAction(ISD::ABS, MVT::v8i16, Legal); + setOperationAction(ISD::ABS, MVT::v16i8, Legal); + } + // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); @@ -8390,6 +8396,8 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + SDLoc dl(Op); + if (IntrinsicID == Intrinsic::thread_pointer) { // Reads the thread pointer register, used for __builtin_thread_pointer. if (Subtarget.isPPC64()) @@ -8397,9 +8405,37 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getRegister(PPC::R2, MVT::i32); } + // We are looking for absolute values here. + // The idea is to try to fit one of two patterns: + // max (a, (0-a)) OR max ((0-a), a) + if (Subtarget.hasP9Vector() && + (IntrinsicID == Intrinsic::ppc_altivec_vmaxsw || + IntrinsicID == Intrinsic::ppc_altivec_vmaxsh || + IntrinsicID == Intrinsic::ppc_altivec_vmaxsb)) { + SDValue V1 = Op.getOperand(1); + SDValue V2 = Op.getOperand(2); + if (V1.getSimpleValueType() == V2.getSimpleValueType() && + (V1.getSimpleValueType() == MVT::v4i32 || + V1.getSimpleValueType() == MVT::v8i16 || + V1.getSimpleValueType() == MVT::v16i8)) { + if ( V1.getOpcode() == ISD::SUB && + ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) && + V1.getOperand(1) == V2 ) { + // Generate the abs instruction with the operands + return DAG.getNode(ISD::ABS, dl, V2.getValueType(),V2); + } + + if ( V2.getOpcode() == ISD::SUB && + ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) && + V2.getOperand(1) == V1 ) { + // Generate the abs instruction with the operands + return DAG.getNode(ISD::ABS, dl, V1.getValueType(),V1); + } + } + } + // If this is a lowered altivec predicate compare, CompareOpc is set to the // opcode number of the comparison. - SDLoc dl(Op); int CompareOpc; bool isDot; if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget)) diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index 5465b5f2d66..d89881c4f7f 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -1488,4 +1488,19 @@ def VABSDUH : VXForm_1<1091, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), def VABSDUW : VXForm_1<1155, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vabsduw $vD, $vA, $vB", IIC_VecGeneral, [(set v4i32:$vD, (int_ppc_altivec_vabsduw v4i32:$vA, v4i32:$vB))]>; + +def : Pat<(v16i8:$vD (abs v16i8:$vA)), + (v16i8 (VABSDUB $vA, (V_SET0B)))>; +def : Pat<(v8i16:$vD (abs v8i16:$vA)), + (v8i16 (VABSDUH $vA, (V_SET0H)))>; +def : Pat<(v4i32:$vD (abs v4i32:$vA)), + (v4i32 (VABSDUW $vA, (V_SET0)))>; + +def : Pat<(v16i8:$vD (abs (sub v16i8:$vA, v16i8:$vB))), + (v16i8 (VABSDUB $vA, $vB))>; +def : Pat<(v8i16:$vD (abs (sub v8i16:$vA, v8i16:$vB))), + (v8i16 (VABSDUH $vA, $vB))>; +def : Pat<(v4i32:$vD (abs (sub v4i32:$vA, v4i32:$vB))), + (v4i32 (VABSDUW $vA, $vB))>; + } // end HasP9Altivec |

