diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-08-05 13:02:53 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-08-05 13:02:53 +0000 |
commit | 7080005e6781f63fe71ac031515c93f942d9c177 (patch) | |
tree | 614d49cf1efbcd94597deec1cac8306a79d9daa9 /llvm/lib | |
parent | f72c663ac56e361e7b5808f64cd0c16d116db526 (diff) | |
download | bcm5719-llvm-7080005e6781f63fe71ac031515c93f942d9c177.tar.gz bcm5719-llvm-7080005e6781f63fe71ac031515c93f942d9c177.zip |
[X86][SSE] Consistently use the target shuffle root value type for vector size calculations. NFCI.
Preparation for adding 2 input support so we want to avoid unnecessary references to the input value type.
llvm-svn: 277814
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 23 |
1 files changed, 12 insertions, 11 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f52a1ec5e01..35730c6161d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -25092,8 +25092,10 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root, MVT VT = Input.getSimpleValueType(); MVT RootVT = Root.getSimpleValueType(); - SDLoc DL(Root); + assert(VT.getSizeInBits() == RootVT.getSizeInBits() && + "Vector size mismatch"); + SDLoc DL(Root); SDValue Res; unsigned NumBaseMaskElts = BaseMask.size(); @@ -25106,6 +25108,8 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root, unsigned RootSizeInBits = RootVT.getSizeInBits(); unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts; + bool FloatDomain = VT.isFloatingPoint() || + (RootVT.is256BitVector() && !Subtarget.hasAVX2()); // Don't combine if we are a AVX512/EVEX target and the mask element size // is different from the root element size - this would prevent writemasks @@ -25122,12 +25126,11 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root, // TODO - handle 128/256-bit lane shuffles of 512-bit vectors. // Handle 128-bit lane shuffles of 256-bit vectors. - if (VT.is256BitVector() && NumBaseMaskElts == 2 && + if (RootVT.is256BitVector() && NumBaseMaskElts == 2 && !isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0)) { if (Depth == 1 && Root.getOpcode() == X86ISD::VPERM2X128) return false; // Nothing to do! - MVT ShuffleVT = (VT.isFloatingPoint() || !Subtarget.hasAVX2() ? MVT::v4f64 - : MVT::v4i64); + MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64); unsigned PermMask = 0; PermMask |= ((BaseMask[0] < 0 ? 0x8 : (BaseMask[0] & 1)) << 0); PermMask |= ((BaseMask[1] < 0 ? 0x8 : (BaseMask[1] & 1)) << 4); @@ -25158,9 +25161,7 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root, unsigned MaskEltSizeInBits = RootSizeInBits / NumMaskElts; // Determine the effective mask value type. - bool FloatDomain = - (VT.isFloatingPoint() || (VT.is256BitVector() && !Subtarget.hasAVX2())) && - (32 <= MaskEltSizeInBits); + FloatDomain &= (32 <= MaskEltSizeInBits); MVT MaskVT = FloatDomain ? MVT::getFloatingPointVT(MaskEltSizeInBits) : MVT::getIntegerVT(MaskEltSizeInBits); MaskVT = MVT::getVectorVT(MaskVT, NumMaskElts); @@ -25265,11 +25266,11 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root, // instructions, but in practice PSHUFB tends to be *very* fast so we're // more aggressive. if ((Depth >= 3 || HasVariableMask) && - ((VT.is128BitVector() && Subtarget.hasSSSE3()) || - (VT.is256BitVector() && Subtarget.hasAVX2()) || - (VT.is512BitVector() && Subtarget.hasBWI()))) { + ((RootVT.is128BitVector() && Subtarget.hasSSSE3()) || + (RootVT.is256BitVector() && Subtarget.hasAVX2()) || + (RootVT.is512BitVector() && Subtarget.hasBWI()))) { SmallVector<SDValue, 16> PSHUFBMask; - int NumBytes = VT.getSizeInBits() / 8; + int NumBytes = RootVT.getSizeInBits() / 8; int Ratio = NumBytes / NumMaskElts; for (int i = 0; i < NumBytes; ++i) { int M = Mask[i / Ratio]; |