diff options
| -rw-r--r-- | llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp | 23 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/Utils/X86ShuffleDecode.h | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 27 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 18 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86MCInstLower.cpp | 27 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll | 134 |
8 files changed, 116 insertions, 121 deletions
diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 6d42a101b0e..9aca2da4902 100644 --- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -287,4 +287,27 @@ void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { } } +void DecodeVPERMILPMask(const ConstantDataSequential *C, + SmallVectorImpl<int> &ShuffleMask) { + Type *MaskTy = C->getType(); + assert(MaskTy->isVectorTy() && "Expected a vector constant mask!"); + assert(MaskTy->getVectorElementType()->isIntegerTy() && + "Expected integer constant mask elements!"); + int ElementBits = MaskTy->getScalarSizeInBits(); + int NumElements = MaskTy->getVectorNumElements(); + assert((NumElements == 2 || NumElements == 4 || NumElements == 8) && + "Unexpected number of vector elements."); + assert((unsigned)NumElements == C->getNumElements() && + "Constant mask has a different number of elements!"); + + ShuffleMask.reserve(NumElements); + for (int i = 0; i < NumElements; ++i) { + int Base = (i * ElementBits / 128) * (128 / ElementBits); + uint64_t Element = C->getElementAsInteger(i); + // Only the least significant 2 bits of the integer are used. + int Index = Base + (Element & 0x3); + ShuffleMask.push_back(Index); + } +} + } // llvm namespace diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h index 03a843e7b8d..8034d209ac3 100644 --- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -84,6 +84,10 @@ void DecodeVPERM2X128Mask(MVT VT, unsigned Imm, /// No VT provided since it only works on 256-bit, 4 element vectors. void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask); +/// \brief Decode a VPERMILP variable mask from an IR-level vector constant. +void DecodeVPERMILPMask(const ConstantDataSequential *C, + SmallVectorImpl<int> &ShuffleMask); + } // llvm namespace #endif diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2e195080f8b..40ab77aaaa0 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -9395,26 +9395,15 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, } // If we have a single input shuffle with different shuffle patterns in the - // two 128-bit lanes, just do two shuffles and blend them together. This will - // be faster than extracting the high 128-bit lane, shuffling it, and - // re-inserting it. Especially on newer processors where blending is *the* - // fastest operation. + // two 128-bit lanes use the variable mask to VPERMILPS. if (isSingleInputShuffleMask(Mask)) { - int LoMask[4] = {Mask[0], Mask[1], Mask[2], Mask[3]}; - int HiMask[4] = {Mask[4], Mask[5], Mask[6], Mask[7]}; - for (int &M : HiMask) - if (M >= 0) - M -= 4; - SDValue Lo = V1, Hi = V1; - if (!isNoopShuffleMask(LoMask)) - Lo = DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f32, Lo, - getV4X86ShuffleImm8ForMask(LoMask, DAG)); - if (!isNoopShuffleMask(HiMask)) - Hi = DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f32, Hi, - getV4X86ShuffleImm8ForMask(HiMask, DAG)); - unsigned BlendMask = 1 << 4 | 1 << 5 | 1 << 6 | 1 << 7; - return DAG.getNode(X86ISD::BLENDI, DL, MVT::v8f32, Lo, Hi, - DAG.getConstant(BlendMask, MVT::i8)); + SDValue VPermMask[8]; + for (int i = 0; i < 8; ++i) + VPermMask[i] = Mask[i] < 0 ? DAG.getUNDEF(MVT::i32) + : DAG.getConstant(Mask[i], MVT::i32); + return DAG.getNode( + X86ISD::VPERMILPV, DL, MVT::v8f32, V1, + DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask)); } // Shuffle the input elements into the desired positions in V1 and V2 and diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index a624fa25dab..a16cf4a0b64 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -343,6 +343,7 @@ namespace llvm { MOVSS, UNPCKL, UNPCKH, + VPERMILPV, VPERMILPI, VPERMV, VPERMV3, diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 455991e4681..2badbb7d76b 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -188,6 +188,8 @@ def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, def SDTShuff3Op : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>; +def SDTShuff2OpM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisVec<2>]>; def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisInt<2>]>; def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, @@ -232,6 +234,7 @@ def X86Packus : SDNode<"X86ISD::PACKUS", SDTPack>; def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>; def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>; +def X86VPermilpv : SDNode<"X86ISD::VPERMILPV", SDTShuff2OpM>; def X86VPermilpi : SDNode<"X86ISD::VPERMILPI", SDTShuff2OpI>; def X86VPermv : SDNode<"X86ISD::VPERMV", SDTShuff2Op>; def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 7a7ca8548a1..a186899d231 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -8418,6 +8418,15 @@ let ExeDomain = SSEPackedDouble in { } let Predicates = [HasAVX] in { +def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (v8i32 VR256:$src2))), + (VPERMILPSYrr VR256:$src1, VR256:$src2)>; +def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))), + (VPERMILPSYrm VR256:$src1, addr:$src2)>; +def : Pat<(v4f64 (X86VPermilpv VR256:$src1, (v4i64 VR256:$src2))), + (VPERMILPDYrr VR256:$src1, VR256:$src2)>; +def : Pat<(v4f64 (X86VPermilpv VR256:$src1, (loadv4i64 addr:$src2))), + (VPERMILPDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8i32 (X86VPermilpi VR256:$src1, (i8 imm:$imm))), (VPERMILPSYri VR256:$src1, imm:$imm)>; def : Pat<(v4i64 (X86VPermilpi VR256:$src1, (i8 imm:$imm))), @@ -8428,6 +8437,15 @@ def : Pat<(v8i32 (X86VPermilpi (bc_v8i32 (loadv4i64 addr:$src1)), def : Pat<(v4i64 (X86VPermilpi (loadv4i64 addr:$src1), (i8 imm:$imm))), (VPERMILPDYmi addr:$src1, imm:$imm)>; +def : Pat<(v4f32 (X86VPermilpv VR128:$src1, (v4i32 VR128:$src2))), + (VPERMILPSrr VR128:$src1, VR128:$src2)>; +def : Pat<(v4f32 (X86VPermilpv VR128:$src1, (bc_v4i32 (loadv2i64 addr:$src2)))), + (VPERMILPSrm VR128:$src1, addr:$src2)>; +def : Pat<(v2f64 (X86VPermilpv VR128:$src1, (v2i64 VR128:$src2))), + (VPERMILPDrr VR128:$src1, VR128:$src2)>; +def : Pat<(v2f64 (X86VPermilpv VR128:$src1, (loadv2i64 addr:$src2))), + (VPERMILPDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (X86VPermilpi VR128:$src1, (i8 imm:$imm))), (VPERMILPDri VR128:$src1, imm:$imm)>; def : Pat<(v2i64 (X86VPermilpi (loadv2i64 addr:$src1), (i8 imm:$imm))), diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index ded84fc28f1..5665a012606 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -1022,15 +1022,19 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { case X86::PSHUFBrm: case X86::VPSHUFBrm: - // Lower PSHUFB normally but add a comment if we can find a constant - // shuffle mask. We won't be able to do this at the MC layer because the - // mask isn't an immediate. + case X86::VPERMILPSrm: + case X86::VPERMILPDrm: + case X86::VPERMILPSYrm: + case X86::VPERMILPDYrm: + // Lower PSHUFB and VPERMILP normally but add a comment if we can find + // a constant shuffle mask. We won't be able to do this at the MC layer + // because the mask isn't an immediate. std::string Comment; raw_string_ostream CS(Comment); SmallVector<int, 16> Mask; - assert(MI->getNumOperands() >= 6 && - "Wrong number of operands for PSHUFBrm or VPSHUFBrm"); + // All of these instructions accept a constant pool operand as their fifth. + assert(MI->getNumOperands() > 5 && "We should always have at least 5 operands!"); const MachineOperand &DstOp = MI->getOperand(0); const MachineOperand &SrcOp = MI->getOperand(1); const MachineOperand &MaskOp = MI->getOperand(5); @@ -1061,7 +1065,18 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { assert(MaskTy == C->getType() && "Expected a constant of the same type!"); - DecodePSHUFBMask(C, Mask); + switch (MI->getOpcode()) { + case X86::PSHUFBrm: + case X86::VPSHUFBrm: + DecodePSHUFBMask(C, Mask); + break; + case X86::VPERMILPSrm: + case X86::VPERMILPDrm: + case X86::VPERMILPSYrm: + case X86::VPERMILPDYrm: + DecodeVPERMILPMask(C, Mask); + } + assert(Mask.size() == MaskTy->getVectorNumElements() && "Shuffle mask has a different size than its type!"); } diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll index e2f731b2af5..df40df2a325 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll @@ -381,9 +381,7 @@ define <8 x float> @shuffle_v8f32_10225466(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_00015444(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: @shuffle_v8f32_00015444 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[1,0,0,0,5,4,4,4] -; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,0,1,4,4,4,5] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,0,1,5,4,4,4] ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4> ret <8 x float> %shuffle @@ -392,9 +390,7 @@ define <8 x float> @shuffle_v8f32_00015444(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_00204644(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: @shuffle_v8f32_00204644 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[0,2,0,0,4,6,4,4] -; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,2,0,4,4,6,4] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,2,0,4,6,4,4] ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4> ret <8 x float> %shuffle @@ -403,9 +399,7 @@ define <8 x float> @shuffle_v8f32_00204644(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_03004474(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: @shuffle_v8f32_03004474 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[0,0,3,0,4,4,7,4] -; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,3,0,0,4,7,4,4] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,3,0,0,4,4,7,4] ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4> ret <8 x float> %shuffle @@ -414,9 +408,7 @@ define <8 x float> @shuffle_v8f32_03004474(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_10004444(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: @shuffle_v8f32_10004444 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[0,0,0,0,4,4,4,4] -; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[1,0,0,0,5,4,4,4] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[1,0,0,0,4,4,4,4] ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> ret <8 x float> %shuffle @@ -425,9 +417,7 @@ define <8 x float> @shuffle_v8f32_10004444(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_22006446(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: @shuffle_v8f32_22006446 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[2,0,0,2,6,4,4,6] -; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[2,2,0,0,6,6,4,4] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[2,2,0,0,6,4,4,6] ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6> ret <8 x float> %shuffle @@ -436,9 +426,7 @@ define <8 x float> @shuffle_v8f32_22006446(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_33307474(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: @shuffle_v8f32_33307474 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[3,0,3,0,7,4,7,4] -; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[3,3,3,0,7,7,7,4] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[3,3,3,0,7,4,7,4] ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4> ret <8 x float> %shuffle @@ -447,8 +435,7 @@ define <8 x float> @shuffle_v8f32_33307474(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_32104567(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: @shuffle_v8f32_32104567 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[3,2,1,0,7,6,5,4] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[3,2,1,0,4,5,6,7] ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7> ret <8 x float> %shuffle @@ -457,9 +444,7 @@ define <8 x float> @shuffle_v8f32_32104567(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_00236744(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: @shuffle_v8f32_00236744 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[2,3,0,0,6,7,4,4] -; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,2,3,4,4,6,7] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,2,3,6,7,4,4] ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4> ret <8 x float> %shuffle @@ -468,9 +453,7 @@ define <8 x float> @shuffle_v8f32_00236744(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_00226644(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: @shuffle_v8f32_00226644 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[2,2,0,0,6,6,4,4] -; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,2,2,4,4,6,6] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,2,2,6,6,4,4] ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4> ret <8 x float> %shuffle @@ -479,8 +462,7 @@ define <8 x float> @shuffle_v8f32_00226644(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_10324567(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: @shuffle_v8f32_10324567 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[1,0,3,2,5,4,7,6] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[1,0,3,2,4,5,6,7] ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> ret <8 x float> %shuffle @@ -489,8 +471,7 @@ define <8 x float> @shuffle_v8f32_10324567(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_11334567(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: @shuffle_v8f32_11334567 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[1,1,3,3,5,5,7,7] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[1,1,3,3,4,5,6,7] ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7> ret <8 x float> %shuffle @@ -499,8 +480,7 @@ define <8 x float> @shuffle_v8f32_11334567(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_01235467(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: @shuffle_v8f32_01235467 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[1,0,2,3,5,4,6,7] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,1,2,3,5,4,6,7] ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> ret <8 x float> %shuffle @@ -509,8 +489,7 @@ define <8 x float> @shuffle_v8f32_01235467(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: @shuffle_v8f32_01235466 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[1,0,2,2,5,4,6,6] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,1,2,3,5,4,6,6] ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6> ret <8 x float> %shuffle @@ -519,9 +498,7 @@ define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: @shuffle_v8f32_002u6u44 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[2,1,0,0,6,5,4,4] -; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,2,3,4,4,6,7] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0 ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4> ret <8 x float> %shuffle @@ -530,9 +507,7 @@ define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: @shuffle_v8f32_00uu66uu ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[2,2,2,3,6,6,6,7] -; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,2,3,4,4,6,7] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0 ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef> ret <8 x float> %shuffle @@ -541,8 +516,7 @@ define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: @shuffle_v8f32_103245uu ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[1,0,3,2,5,4,7,6] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0 ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef> ret <8 x float> %shuffle @@ -551,8 +525,7 @@ define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: @shuffle_v8f32_1133uu67 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[1,1,3,3,5,5,7,7] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0 ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7> ret <8 x float> %shuffle @@ -561,8 +534,7 @@ define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: @shuffle_v8f32_0uu354uu ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[1,0,2,3,5,4,6,7] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0 ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef> ret <8 x float> %shuffle @@ -571,8 +543,7 @@ define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) { define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) { ; ALL-LABEL: @shuffle_v8f32_uuu3uu66 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[0,1,2,2,4,5,6,6] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0 ; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6> ret <8 x float> %shuffle @@ -956,9 +927,7 @@ define <8 x i32> @shuffle_v8i32_10225466(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_00015444(<8 x i32> %a, <8 x i32> %b) { ; ALL-LABEL: @shuffle_v8i32_00015444 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[1,0,0,0,5,4,4,4] -; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,0,1,4,4,4,5] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,0,1,5,4,4,4] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4> ret <8 x i32> %shuffle @@ -967,9 +936,7 @@ define <8 x i32> @shuffle_v8i32_00015444(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_00204644(<8 x i32> %a, <8 x i32> %b) { ; ALL-LABEL: @shuffle_v8i32_00204644 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[0,2,0,0,4,6,4,4] -; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,2,0,4,4,6,4] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,2,0,4,6,4,4] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4> ret <8 x i32> %shuffle @@ -978,9 +945,7 @@ define <8 x i32> @shuffle_v8i32_00204644(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_03004474(<8 x i32> %a, <8 x i32> %b) { ; ALL-LABEL: @shuffle_v8i32_03004474 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[0,0,3,0,4,4,7,4] -; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,3,0,0,4,7,4,4] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,3,0,0,4,4,7,4] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4> ret <8 x i32> %shuffle @@ -989,9 +954,7 @@ define <8 x i32> @shuffle_v8i32_03004474(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_10004444(<8 x i32> %a, <8 x i32> %b) { ; ALL-LABEL: @shuffle_v8i32_10004444 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[0,0,0,0,4,4,4,4] -; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[1,0,0,0,5,4,4,4] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[1,0,0,0,4,4,4,4] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> ret <8 x i32> %shuffle @@ -1000,9 +963,7 @@ define <8 x i32> @shuffle_v8i32_10004444(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_22006446(<8 x i32> %a, <8 x i32> %b) { ; ALL-LABEL: @shuffle_v8i32_22006446 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[2,0,0,2,6,4,4,6] -; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[2,2,0,0,6,6,4,4] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[2,2,0,0,6,4,4,6] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6> ret <8 x i32> %shuffle @@ -1011,9 +972,7 @@ define <8 x i32> @shuffle_v8i32_22006446(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_33307474(<8 x i32> %a, <8 x i32> %b) { ; ALL-LABEL: @shuffle_v8i32_33307474 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[3,0,3,0,7,4,7,4] -; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[3,3,3,0,7,7,7,4] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[3,3,3,0,7,4,7,4] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4> ret <8 x i32> %shuffle @@ -1022,8 +981,7 @@ define <8 x i32> @shuffle_v8i32_33307474(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_32104567(<8 x i32> %a, <8 x i32> %b) { ; ALL-LABEL: @shuffle_v8i32_32104567 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[3,2,1,0,7,6,5,4] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[3,2,1,0,4,5,6,7] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7> ret <8 x i32> %shuffle @@ -1032,9 +990,7 @@ define <8 x i32> @shuffle_v8i32_32104567(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_00236744(<8 x i32> %a, <8 x i32> %b) { ; ALL-LABEL: @shuffle_v8i32_00236744 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[2,3,0,0,6,7,4,4] -; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,2,3,4,4,6,7] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,2,3,6,7,4,4] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4> ret <8 x i32> %shuffle @@ -1043,9 +999,7 @@ define <8 x i32> @shuffle_v8i32_00236744(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_00226644(<8 x i32> %a, <8 x i32> %b) { ; ALL-LABEL: @shuffle_v8i32_00226644 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[2,2,0,0,6,6,4,4] -; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,2,2,4,4,6,6] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,2,2,6,6,4,4] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4> ret <8 x i32> %shuffle @@ -1054,8 +1008,7 @@ define <8 x i32> @shuffle_v8i32_00226644(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_10324567(<8 x i32> %a, <8 x i32> %b) { ; ALL-LABEL: @shuffle_v8i32_10324567 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[1,0,3,2,5,4,7,6] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[1,0,3,2,4,5,6,7] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> ret <8 x i32> %shuffle @@ -1064,8 +1017,7 @@ define <8 x i32> @shuffle_v8i32_10324567(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_11334567(<8 x i32> %a, <8 x i32> %b) { ; ALL-LABEL: @shuffle_v8i32_11334567 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[1,1,3,3,5,5,7,7] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[1,1,3,3,4,5,6,7] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7> ret <8 x i32> %shuffle @@ -1074,8 +1026,7 @@ define <8 x i32> @shuffle_v8i32_11334567(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_01235467(<8 x i32> %a, <8 x i32> %b) { ; ALL-LABEL: @shuffle_v8i32_01235467 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[1,0,2,3,5,4,6,7] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,1,2,3,5,4,6,7] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7> ret <8 x i32> %shuffle @@ -1084,8 +1035,7 @@ define <8 x i32> @shuffle_v8i32_01235467(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) { ; ALL-LABEL: @shuffle_v8i32_01235466 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[1,0,2,2,5,4,6,6] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,1,2,3,5,4,6,6] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6> ret <8 x i32> %shuffle @@ -1094,9 +1044,7 @@ define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) { ; ALL-LABEL: @shuffle_v8i32_002u6u44 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[2,1,0,0,6,5,4,4] -; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,2,3,4,4,6,7] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0 ; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4> ret <8 x i32> %shuffle @@ -1105,9 +1053,7 @@ define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) { ; ALL-LABEL: @shuffle_v8i32_00uu66uu ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[2,2,2,3,6,6,6,7] -; ALL-NEXT: vpermilps {{.*}} # ymm0 = ymm0[0,0,2,3,4,4,6,7] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0 ; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef> ret <8 x i32> %shuffle @@ -1116,8 +1062,7 @@ define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) { ; ALL-LABEL: @shuffle_v8i32_103245uu ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[1,0,3,2,5,4,7,6] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0 ; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef> ret <8 x i32> %shuffle @@ -1126,8 +1071,7 @@ define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) { ; ALL-LABEL: @shuffle_v8i32_1133uu67 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[1,1,3,3,5,5,7,7] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0 ; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7> ret <8 x i32> %shuffle @@ -1136,8 +1080,7 @@ define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) { ; ALL-LABEL: @shuffle_v8i32_0uu354uu ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[1,0,2,3,5,4,6,7] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0 ; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef> ret <8 x i32> %shuffle @@ -1146,8 +1089,7 @@ define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) { ; ALL-LABEL: @shuffle_v8i32_uuu3uu66 ; ALL: # BB#0: -; ALL-NEXT: vpermilps {{.*}} # ymm1 = ymm0[0,1,2,2,4,5,6,6] -; ALL-NEXT: vblendps {{.*}} # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; ALL-NEXT: vpermilps {{.*}}, %ymm0, %ymm0 ; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6> ret <8 x i32> %shuffle |

