diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-01-09 17:20:03 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-01-09 17:20:03 +0000 |
commit | 0f23b2ba1a4a476c5d1e2519fdfca5d92dc0e0df (patch) | |
tree | 826d6275dbaa9cbb98d10b24c9c91588e02a25be /llvm/lib/Target/X86/X86ISelLowering.cpp | |
parent | e4bb54a8556ac37009dfa815d438f1e0f6890bd2 (diff) | |
download | bcm5719-llvm-0f23b2ba1a4a476c5d1e2519fdfca5d92dc0e0df.tar.gz bcm5719-llvm-0f23b2ba1a4a476c5d1e2519fdfca5d92dc0e0df.zip |
[X86][AVX512] Enable v16i8/v32i8 vector shifts to use an extend+shift+truncate pattern.
Use the existing AVX2 v8i16 vector shift lowering for v16i8 (extending to v16i32) on AVX512 targets and v32i8 (extending to v32i16) on AVX512BW targets.
Cost model updates to follow.
llvm-svn: 291451
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 31 |
1 files changed, 17 insertions, 14 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 66f1959c035..c09a19814f3 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -21545,6 +21545,23 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7}); } + // It's worth extending once and using the vXi16/vXi32 shifts for smaller + // types, but without AVX512 the extra overheads to get from vXi8 to vXi32 + // make the existing SSE solution better. + if ((Subtarget.hasInt256() && VT == MVT::v8i16) || + (Subtarget.hasAVX512() && VT == MVT::v16i16) || + (Subtarget.hasAVX512() && VT == MVT::v16i8) || + (Subtarget.hasBWI() && VT == MVT::v32i8)) { + MVT EvtSVT = (VT == MVT::v32i8 ? MVT::i16 : MVT::i32); + MVT ExtVT = MVT::getVectorVT(EvtSVT, VT.getVectorNumElements()); + unsigned ExtOpc = + Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + R = DAG.getNode(ExtOpc, dl, ExtVT, R); + Amt = DAG.getNode(ISD::ANY_EXTEND, dl, ExtVT, Amt); + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt)); + } + if (VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP())) { MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2); @@ -21653,20 +21670,6 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, } } - // It's worth extending once and using the vXi32 shifts for 16-bit types, but - // the extra overheads to get from v16i8 to v8i32 make the existing SSE - // solution better. - if ((Subtarget.hasInt256() && VT == MVT::v8i16) || - (Subtarget.hasAVX512() && VT == MVT::v16i16)) { - MVT ExtVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements()); - unsigned ExtOpc = - Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; - R = DAG.getNode(ExtOpc, dl, ExtVT, R); - Amt = DAG.getNode(ISD::ANY_EXTEND, dl, ExtVT, Amt); - return DAG.getNode(ISD::TRUNCATE, dl, VT, - DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt)); - } - if (Subtarget.hasInt256() && !Subtarget.hasXOP() && VT == MVT::v16i16) { MVT ExtVT = MVT::v8i32; SDValue Z = getZeroVector(VT, Subtarget, DAG, dl); |