summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-01-11 10:36:51 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-01-11 10:36:51 +0000
commit5a81fefad3cebc32949a7955ab5e1ce8942e5c55 (patch)
treea389da1cb7474b74cf1cb40f236bd220899e3e7e /llvm/lib/Target/X86
parenta47515ec4adf3652903d2fdb6eb765d37633aee9 (diff)
downloadbcm5719-llvm-5a81fefad3cebc32949a7955ab5e1ce8942e5c55.tar.gz
bcm5719-llvm-5a81fefad3cebc32949a7955ab5e1ce8942e5c55.zip
[X86][AVX512BW] Vectorize v64i8 vector shifts
Differential Revision: https://reviews.llvm.org/D28447 llvm-svn: 291665
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp20
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp4
2 files changed, 20 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 84c51182739..31978ffe6ea 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -21752,14 +21752,26 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
}
if (VT == MVT::v16i8 ||
- (VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP())) {
+ (VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP()) ||
+ (VT == MVT::v64i8 && Subtarget.hasBWI())) {
MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
unsigned ShiftOpcode = Op->getOpcode();
auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) {
- // On SSE41 targets we make use of the fact that VSELECT lowers
- // to PBLENDVB which selects bytes based just on the sign bit.
- if (Subtarget.hasSSE41()) {
+ if (VT.is512BitVector()) {
+ // On AVX512BW targets we make use of the fact that VSELECT lowers
+ // to a masked blend which selects bytes based just on the sign bit
+ // extracted to a mask.
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+ V0 = DAG.getBitcast(VT, V0);
+ V1 = DAG.getBitcast(VT, V1);
+ Sel = DAG.getBitcast(VT, Sel);
+ Sel = DAG.getNode(X86ISD::CVT2MASK, dl, MaskVT, Sel);
+ return DAG.getBitcast(SelVT,
+ DAG.getNode(ISD::VSELECT, dl, VT, Sel, V0, V1));
+ } else if (Subtarget.hasSSE41()) {
+ // On SSE41 targets we make use of the fact that VSELECT lowers
+ // to PBLENDVB which selects bytes based just on the sign bit.
V0 = DAG.getBitcast(VT, V0);
V1 = DAG.getBitcast(VT, V1);
Sel = DAG.getBitcast(VT, Sel);
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 586786d29e9..5715d826862 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -323,6 +323,10 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRL, MVT::v32i16, 1 }, // vpsrlvw
{ ISD::SRA, MVT::v32i16, 1 }, // vpsravw
+ { ISD::SHL, MVT::v64i8, 11 }, // vpblendvb sequence.
+ { ISD::SRL, MVT::v64i8, 11 }, // vpblendvb sequence.
+ { ISD::SRA, MVT::v64i8, 24 }, // vpblendvb sequence.
+
{ ISD::MUL, MVT::v64i8, 11 }, // extend/pmullw/trunc sequence.
{ ISD::MUL, MVT::v32i8, 4 }, // extend/pmullw/trunc sequence.
{ ISD::MUL, MVT::v16i8, 4 }, // extend/pmullw/trunc sequence.
OpenPOWER on IntegriCloud