diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-04-28 14:31:01 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-04-28 14:31:01 +0000 |
| commit | 22d1476bfa80a5e418ed3e60fda3a2ffcffe2e12 (patch) | |
| tree | 934db32a2636441f265eebf016684e174f8dfda6 /llvm/lib | |
| parent | ce8cfe96f76fdc0c8c2f7b0fe0cd25a2990f6f44 (diff) | |
| download | bcm5719-llvm-22d1476bfa80a5e418ed3e60fda3a2ffcffe2e12.tar.gz bcm5719-llvm-22d1476bfa80a5e418ed3e60fda3a2ffcffe2e12.zip | |
[X86][AVX] Combine non-lane crossing binary shuffles using X86ISD::VPERMV3
Some of the combines might be further improved if we lower more shuffles with X86ISD::VPERMV3 directly, instead of waiting to combine the results.
llvm-svn: 359400
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 535a7b27b62..32e3a80d5c7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -31874,6 +31874,28 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, } } + // If we have a dual input shuffle then lower to VPERMV3. + if (!UnaryShuffle && AllowVariableMask && !MaskContainsZeros && + ((Subtarget.hasAVX512() && + (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 || + MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) || + (Subtarget.hasVLX() && + (MaskVT == MVT::v2f64 || MaskVT == MVT::v2i64 || MaskVT == MVT::v4f64 || + MaskVT == MVT::v4i64 || MaskVT == MVT::v4f32 || MaskVT == MVT::v4i32 || + MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) || + (Subtarget.hasBWI() && MaskVT == MVT::v32i16) || + (Subtarget.hasBWI() && Subtarget.hasVLX() && + (MaskVT == MVT::v8i16 || MaskVT == MVT::v16i16)) || + (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) || + (Subtarget.hasVBMI() && Subtarget.hasVLX() && + (MaskVT == MVT::v16i8 || MaskVT == MVT::v32i8)))) { + SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); + V1 = DAG.getBitcast(MaskVT, V1); + V2 = DAG.getBitcast(MaskVT, V2); + Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, V1, VPermMask, V2); + return DAG.getBitcast(RootVT, Res); + } + // Failed to find any combines. return SDValue(); } |

