diff options
| author | Craig Topper <craig.topper@intel.com> | 2017-09-28 20:10:34 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2017-09-28 20:10:34 +0000 |
| commit | ed19350293f14bce74f53d9ffbe72ba4fc5d7bd2 (patch) | |
| tree | d1aeb99a2f3163d5e55c6e84dd3433ad77c1503f /llvm/lib | |
| parent | de22fe5b5b7dbc825494a5d7c50fb09adcc6524f (diff) | |
| download | bcm5719-llvm-ed19350293f14bce74f53d9ffbe72ba4fc5d7bd2.tar.gz bcm5719-llvm-ed19350293f14bce74f53d9ffbe72ba4fc5d7bd2.zip | |
[X86] Make use of vpmovwb when possible in LowerMULH
If we have BWI, we can truncate in a much simpler way by using vpmovwb. This even works without VLX by using the wider zmm->ymm truncate with a subvector extract.
Differential Revision: https://reviews.llvm.org/D38375
llvm-svn: 314457
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 23 |
1 files changed, 8 insertions, 15 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d762c985e90..980db1012be 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -21631,17 +21631,7 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget, SDValue Mul = DAG.getNode(ISD::MUL, dl, MVT::v32i16, ExA, ExB); Mul = DAG.getNode(ISD::SRL, dl, MVT::v32i16, Mul, DAG.getConstant(8, dl, MVT::v32i16)); - // The ymm variant of PACKUS treats the 128-bit lanes separately, so - // before using PACKUS we need to permute the inputs to the correct - // lo/hi xmm lane. - const int Mask[] = { 0, 1, 2, 3, 4, 5, 6, 7, - 16, 17, 18, 19, 20, 21, 22, 23, - 8, 9, 10, 11, 12, 13, 14, 15, - 24, 25, 26, 27, 28, 29, 30, 31}; - Mul = DAG.getVectorShuffle(MVT::v32i16, dl, Mul, Mul, Mask); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v16i16, Mul, Lo); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v16i16, Mul, Hi); - return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Mul); } SDValue ALo = extract128BitVector(A, 0, DAG, dl); SDValue BLo = extract128BitVector(B, 0, DAG, dl); @@ -21671,10 +21661,13 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget, SDValue ExA = DAG.getNode(ExAVX, dl, MVT::v16i16, A); SDValue ExB = DAG.getNode(ExAVX, dl, MVT::v16i16, B); SDValue Mul = DAG.getNode(ISD::MUL, dl, MVT::v16i16, ExA, ExB); - SDValue MulH = DAG.getNode(ISD::SRL, dl, MVT::v16i16, Mul, - DAG.getConstant(8, dl, MVT::v16i16)); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i16, MulH, Lo); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i16, MulH, Hi); + Mul = DAG.getNode(ISD::SRL, dl, MVT::v16i16, Mul, + DAG.getConstant(8, dl, MVT::v16i16)); + // If we have BWI we can use truncate instruction. + if (Subtarget.hasBWI()) + return DAG.getNode(ISD::TRUNCATE, dl, VT, Mul); + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i16, Mul, Lo); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i16, Mul, Hi); return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi); } |

