summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-04-28 14:31:01 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-04-28 14:31:01 +0000
commit22d1476bfa80a5e418ed3e60fda3a2ffcffe2e12 (patch)
tree934db32a2636441f265eebf016684e174f8dfda6 /llvm/lib
parentce8cfe96f76fdc0c8c2f7b0fe0cd25a2990f6f44 (diff)
downloadbcm5719-llvm-22d1476bfa80a5e418ed3e60fda3a2ffcffe2e12.tar.gz
bcm5719-llvm-22d1476bfa80a5e418ed3e60fda3a2ffcffe2e12.zip
[X86][AVX] Combine non-lane crossing binary shuffles using X86ISD::VPERMV3
Some of the combines might be further improved if we lower more shuffles with X86ISD::VPERMV3 directly, instead of waiting to combine the results. llvm-svn: 359400
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp22
1 files changed, 22 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 535a7b27b62..32e3a80d5c7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -31874,6 +31874,28 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
}
+ // If we have a dual input shuffle then lower to VPERMV3.
+ if (!UnaryShuffle && AllowVariableMask && !MaskContainsZeros &&
+ ((Subtarget.hasAVX512() &&
+ (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 ||
+ MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) ||
+ (Subtarget.hasVLX() &&
+ (MaskVT == MVT::v2f64 || MaskVT == MVT::v2i64 || MaskVT == MVT::v4f64 ||
+ MaskVT == MVT::v4i64 || MaskVT == MVT::v4f32 || MaskVT == MVT::v4i32 ||
+ MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) ||
+ (Subtarget.hasBWI() && MaskVT == MVT::v32i16) ||
+ (Subtarget.hasBWI() && Subtarget.hasVLX() &&
+ (MaskVT == MVT::v8i16 || MaskVT == MVT::v16i16)) ||
+ (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) ||
+ (Subtarget.hasVBMI() && Subtarget.hasVLX() &&
+ (MaskVT == MVT::v16i8 || MaskVT == MVT::v32i8)))) {
+ SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true);
+ V1 = DAG.getBitcast(MaskVT, V1);
+ V2 = DAG.getBitcast(MaskVT, V2);
+ Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, V1, VPermMask, V2);
+ return DAG.getBitcast(RootVT, Res);
+ }
+
// Failed to find any combines.
return SDValue();
}
OpenPOWER on IntegriCloud