summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2017-09-28 20:10:34 +0000
committerCraig Topper <craig.topper@intel.com>2017-09-28 20:10:34 +0000
commited19350293f14bce74f53d9ffbe72ba4fc5d7bd2 (patch)
treed1aeb99a2f3163d5e55c6e84dd3433ad77c1503f /llvm/lib
parentde22fe5b5b7dbc825494a5d7c50fb09adcc6524f (diff)
downloadbcm5719-llvm-ed19350293f14bce74f53d9ffbe72ba4fc5d7bd2.tar.gz
bcm5719-llvm-ed19350293f14bce74f53d9ffbe72ba4fc5d7bd2.zip
[X86] Make use of vpmovwb when possible in LowerMULH
If we have BWI, we can truncate in a much simpler way by using vpmovwb. This even works without VLX by using the wider zmm->ymm truncate with a subvector extract. Differential Revision: https://reviews.llvm.org/D38375 llvm-svn: 314457
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp23
1 files changed, 8 insertions, 15 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d762c985e90..980db1012be 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -21631,17 +21631,7 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
SDValue Mul = DAG.getNode(ISD::MUL, dl, MVT::v32i16, ExA, ExB);
Mul = DAG.getNode(ISD::SRL, dl, MVT::v32i16, Mul,
DAG.getConstant(8, dl, MVT::v32i16));
- // The ymm variant of PACKUS treats the 128-bit lanes separately, so
- // before using PACKUS we need to permute the inputs to the correct
- // lo/hi xmm lane.
- const int Mask[] = { 0, 1, 2, 3, 4, 5, 6, 7,
- 16, 17, 18, 19, 20, 21, 22, 23,
- 8, 9, 10, 11, 12, 13, 14, 15,
- 24, 25, 26, 27, 28, 29, 30, 31};
- Mul = DAG.getVectorShuffle(MVT::v32i16, dl, Mul, Mul, Mask);
- Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v16i16, Mul, Lo);
- Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v16i16, Mul, Hi);
- return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi);
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
}
SDValue ALo = extract128BitVector(A, 0, DAG, dl);
SDValue BLo = extract128BitVector(B, 0, DAG, dl);
@@ -21671,10 +21661,13 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
SDValue ExA = DAG.getNode(ExAVX, dl, MVT::v16i16, A);
SDValue ExB = DAG.getNode(ExAVX, dl, MVT::v16i16, B);
SDValue Mul = DAG.getNode(ISD::MUL, dl, MVT::v16i16, ExA, ExB);
- SDValue MulH = DAG.getNode(ISD::SRL, dl, MVT::v16i16, Mul,
- DAG.getConstant(8, dl, MVT::v16i16));
- Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i16, MulH, Lo);
- Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i16, MulH, Hi);
+ Mul = DAG.getNode(ISD::SRL, dl, MVT::v16i16, Mul,
+ DAG.getConstant(8, dl, MVT::v16i16));
+ // If we have BWI we can use truncate instruction.
+ if (Subtarget.hasBWI())
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i16, Mul, Lo);
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i16, Mul, Hi);
return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi);
}
OpenPOWER on IntegriCloud