diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2019-04-22 22:43:36 +0000 | 
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2019-04-22 22:43:36 +0000 | 
| commit | bf8aacb7151cf3ed569c4907c934c45244f6914f (patch) | |
| tree | 3e02f7cf65f7b7fab3a8796d767203fac5fb1b99 /llvm | |
| parent | a38b8c8abce0233e7ffc8d9d8de2436505877d59 (diff) | |
| download | bcm5719-llvm-bf8aacb7151cf3ed569c4907c934c45244f6914f.tar.gz bcm5719-llvm-bf8aacb7151cf3ed569c4907c934c45244f6914f.zip  | |
[SelectionDAG] move splat util functions up from x86 lowering
This was supposed to be NFC, but the change in SDLoc
definitions causes instruction scheduling changes.
There's nothing x86-specific in this code, and it can
likely be used from DAGCombiner's simplifyVBinOp().
llvm-svn: 358930
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/include/llvm/CodeGen/SelectionDAG.h | 7 | ||||
| -rw-r--r-- | llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 4 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 52 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 57 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-rotate-256.ll | 10 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-rotate-512.ll | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shift-ashr-256.ll | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shift-lshr-256.ll | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shift-shl-256.ll | 16 | 
9 files changed, 101 insertions, 93 deletions
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index aac3e0f0c66..f272c595d87 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1539,6 +1539,13 @@ public:    /// Test whether \p V has a splatted value.    bool isSplatValue(SDValue V, bool AllowUndefs = false); +  /// If V is a splatted value, return the source vector and its splat index. +  SDValue getSplatSourceVector(SDValue V, int &SplatIndex); + +  /// If V is a splat vector, return its scalar source operand by extracting +  /// that element from the source vector. +  SDValue getSplatValue(SDValue V); +    /// Match a binop + shuffle pyramid that represents a horizontal reduction    /// over the elements of a vector starting from the EXTRACT_VECTOR_ELT node /p    /// Extract. The reduction must use one of the opcodes listed in /p diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index d9f930d725a..67fc3e99a1f 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1641,6 +1641,10 @@ SDValue peekThroughBitcasts(SDValue V);  /// If \p V is not a bitcasted one-use value, it is returned as-is.  SDValue peekThroughOneUseBitcasts(SDValue V); +/// Return the non-extracted vector source operand of \p V if it exists. +/// If \p V is not an extracted subvector, it is returned as-is. +SDValue peekThroughExtractSubvectors(SDValue V); +  /// Returns true if \p V is a bitwise not operation. Assumes that an all ones  /// constant is canonicalized to be operand 1.  bool isBitwiseNot(SDValue V); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b2de2a4f343..4b21b96c9df 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2300,6 +2300,52 @@ bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) {           (AllowUndefs || !UndefElts);  } +SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) { +  V = peekThroughExtractSubvectors(V); + +  EVT VT = V.getValueType(); +  unsigned Opcode = V.getOpcode(); +  switch (Opcode) { +  default: { +    APInt UndefElts; +    APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); +    if (isSplatValue(V, DemandedElts, UndefElts)) { +      // Handle case where all demanded elements are UNDEF. +      if (DemandedElts.isSubsetOf(UndefElts)) { +        SplatIdx = 0; +        return getUNDEF(VT); +      } +      SplatIdx = (UndefElts & DemandedElts).countTrailingOnes(); +      return V; +    } +    break; +  } +  case ISD::VECTOR_SHUFFLE: { +    // Check if this is a shuffle node doing a splat. +    // TODO - remove this and rely purely on SelectionDAG::isSplatValue, +    // getTargetVShiftNode currently struggles without the splat source. +    auto *SVN = cast<ShuffleVectorSDNode>(V); +    if (!SVN->isSplat()) +      break; +    int Idx = SVN->getSplatIndex(); +    int NumElts = V.getValueType().getVectorNumElements(); +    SplatIdx = Idx % NumElts; +    return V.getOperand(Idx / NumElts); +  } +  } + +  return SDValue(); +} + +SDValue SelectionDAG::getSplatValue(SDValue V) { +  int SplatIdx; +  if (SDValue SrcVector = getSplatSourceVector(V, SplatIdx)) +    return getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), +                   SrcVector.getValueType().getScalarType(), SrcVector, +                   getIntPtrConstant(SplatIdx, SDLoc(V))); +  return SDValue(); +} +  /// If a SHL/SRA/SRL node has a constant or splat constant shift amount that  /// is less than the element bit-width of the shift node, return it.  static const APInt *getValidShiftAmountConstant(SDValue V) { @@ -8585,6 +8631,12 @@ SDValue llvm::peekThroughOneUseBitcasts(SDValue V) {    return V;  } +SDValue llvm::peekThroughExtractSubvectors(SDValue V) { +  while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) +    V = V.getOperand(0); +  return V; +} +  bool llvm::isBitwiseNot(SDValue V) {    if (V.getOpcode() != ISD::XOR)      return false; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2534948bb5c..f13e1147dd9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5711,13 +5711,6 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,    return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);  } -// Peek through EXTRACT_SUBVECTORs - typically used for AVX1 256-bit intops. -static SDValue peekThroughEXTRACT_SUBVECTORs(SDValue V) { -  while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) -    V = V.getOperand(0); -  return V; -} -  static const Constant *getTargetConstantFromNode(SDValue Op) {    Op = peekThroughBitcasts(Op); @@ -24722,54 +24715,6 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,    return SDValue();  } -// If V is a splat value, return the source vector and splat index; -static SDValue IsSplatVector(SDValue V, int &SplatIdx, SelectionDAG &DAG) { -  V = peekThroughEXTRACT_SUBVECTORs(V); - -  EVT VT = V.getValueType(); -  unsigned Opcode = V.getOpcode(); -  switch (Opcode) { -  default: { -    APInt UndefElts; -    APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); -    if (DAG.isSplatValue(V, DemandedElts, UndefElts)) { -      // Handle case where all demanded elements are UNDEF. -      if (DemandedElts.isSubsetOf(UndefElts)) { -        SplatIdx = 0; -        return DAG.getUNDEF(VT); -      } -      SplatIdx = (UndefElts & DemandedElts).countTrailingOnes(); -      return V; -    } -    break; -  } -  case ISD::VECTOR_SHUFFLE: { -    // Check if this is a shuffle node doing a splat. -    // TODO - remove this and rely purely on SelectionDAG::isSplatValue, -    // getTargetVShiftNode currently struggles without the splat source. -    auto *SVN = cast<ShuffleVectorSDNode>(V); -    if (!SVN->isSplat()) -      break; -    int Idx = SVN->getSplatIndex(); -    int NumElts = V.getValueType().getVectorNumElements(); -    SplatIdx = Idx % NumElts; -    return V.getOperand(Idx / NumElts); -  } -  } - -  return SDValue(); -} - -static SDValue GetSplatValue(SDValue V, const SDLoc &dl, -                             SelectionDAG &DAG) { -  int SplatIdx; -  if (SDValue SrcVector = IsSplatVector(V, SplatIdx, DAG)) -    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, -                       SrcVector.getValueType().getScalarType(), SrcVector, -                       DAG.getIntPtrConstant(SplatIdx, dl)); -  return SDValue(); -} -  static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,                                          const X86Subtarget &Subtarget) {    MVT VT = Op.getSimpleValueType(); @@ -24780,7 +24725,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,    unsigned X86OpcI = getTargetVShiftUniformOpcode(Opcode, false);    unsigned X86OpcV = getTargetVShiftUniformOpcode(Opcode, true); -  if (SDValue BaseShAmt = GetSplatValue(Amt, dl, DAG)) { +  if (SDValue BaseShAmt = DAG.getSplatValue(Amt)) {      if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) {        MVT EltVT = VT.getVectorElementType();        assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!"); diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll index 3746c8e1283..ac50c36c530 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-256.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll @@ -527,11 +527,11 @@ define <4 x i64> @splatvar_rotate_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {  ;  ; AVX2-LABEL: splatvar_rotate_v4i64:  ; AVX2:       # %bb.0: -; AVX2-NEXT:    vpsllq %xmm1, %ymm0, %ymm2 -; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [64,64] -; AVX2-NEXT:    vpsubq %xmm1, %xmm3, %xmm1 -; AVX2-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0 -; AVX2-NEXT:    vpor %ymm0, %ymm2, %ymm0 +; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [64,64] +; AVX2-NEXT:    vpsubq %xmm1, %xmm2, %xmm2 +; AVX2-NEXT:    vpsllq %xmm1, %ymm0, %ymm1 +; AVX2-NEXT:    vpsrlq %xmm2, %ymm0, %ymm0 +; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0  ; AVX2-NEXT:    retq  ;  ; AVX512F-LABEL: splatvar_rotate_v4i64: diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll index f1118b1e172..01b4676a1c4 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-512.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll @@ -344,10 +344,10 @@ define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind  ; AVX512BW-LABEL: splatvar_rotate_v32i16:  ; AVX512BW:       # %bb.0:  ; AVX512BW-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT:    vpsllw %xmm2, %zmm0, %zmm2  ; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]  ; AVX512BW-NEXT:    vpsubw %xmm1, %xmm3, %xmm1  ; AVX512BW-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT:    vpsllw %xmm2, %zmm0, %zmm2  ; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0  ; AVX512BW-NEXT:    vporq %zmm0, %zmm2, %zmm0  ; AVX512BW-NEXT:    retq @@ -355,10 +355,10 @@ define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind  ; AVX512VLBW-LABEL: splatvar_rotate_v32i16:  ; AVX512VLBW:       # %bb.0:  ; AVX512VLBW-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT:    vpsllw %xmm2, %zmm0, %zmm2  ; AVX512VLBW-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]  ; AVX512VLBW-NEXT:    vpsubw %xmm1, %xmm3, %xmm1  ; AVX512VLBW-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT:    vpsllw %xmm2, %zmm0, %zmm2  ; AVX512VLBW-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0  ; AVX512VLBW-NEXT:    vporq %zmm0, %zmm2, %zmm0  ; AVX512VLBW-NEXT:    retq @@ -426,14 +426,14 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {  ; AVX512BW-LABEL: splatvar_rotate_v64i8:  ; AVX512BW:       # %bb.0:  ; AVX512BW-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512BW-NEXT:    vpsubb %xmm1, %xmm3, %xmm1 +; AVX512BW-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512BW-NEXT:    vpsllw %xmm2, %zmm0, %zmm3  ; AVX512BW-NEXT:    vpternlogd $255, %zmm4, %zmm4, %zmm4  ; AVX512BW-NEXT:    vpsllw %xmm2, %zmm4, %zmm2  ; AVX512BW-NEXT:    vpbroadcastb %xmm2, %zmm2  ; AVX512BW-NEXT:    vpandq %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512BW-NEXT:    vpsubb %xmm1, %xmm3, %xmm1 -; AVX512BW-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0  ; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm4, %zmm1  ; AVX512BW-NEXT:    vpsrlw $8, %zmm1, %zmm1 @@ -445,14 +445,14 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {  ; AVX512VLBW-LABEL: splatvar_rotate_v64i8:  ; AVX512VLBW:       # %bb.0:  ; AVX512VLBW-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VLBW-NEXT:    vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VLBW-NEXT:    vpsubb %xmm1, %xmm3, %xmm1 +; AVX512VLBW-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VLBW-NEXT:    vpsllw %xmm2, %zmm0, %zmm3  ; AVX512VLBW-NEXT:    vpternlogd $255, %zmm4, %zmm4, %zmm4  ; AVX512VLBW-NEXT:    vpsllw %xmm2, %zmm4, %zmm2  ; AVX512VLBW-NEXT:    vpbroadcastb %xmm2, %zmm2  ; AVX512VLBW-NEXT:    vpandq %zmm2, %zmm3, %zmm2 -; AVX512VLBW-NEXT:    vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VLBW-NEXT:    vpsubb %xmm1, %xmm3, %xmm1 -; AVX512VLBW-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero  ; AVX512VLBW-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0  ; AVX512VLBW-NEXT:    vpsrlw %xmm1, %zmm4, %zmm1  ; AVX512VLBW-NEXT:    vpsrlw $8, %zmm1, %zmm1 diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll index e895ac9ea45..fdf7f4aa109 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll @@ -737,8 +737,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {  define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {  ; AVX1-LABEL: splatvar_shift_v8i32:  ; AVX1:       # %bb.0: -; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; AVX1-NEXT:    vpsrad %xmm1, %xmm2, %xmm2  ; AVX1-NEXT:    vpsrad %xmm1, %xmm0, %xmm0  ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -752,8 +752,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {  ;  ; XOPAVX1-LABEL: splatvar_shift_v8i32:  ; XOPAVX1:       # %bb.0: -; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; XOPAVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; XOPAVX1-NEXT:    vpsrad %xmm1, %xmm2, %xmm2  ; XOPAVX1-NEXT:    vpsrad %xmm1, %xmm0, %xmm0  ; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -779,8 +779,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {  ;  ; X32-AVX1-LABEL: splatvar_shift_v8i32:  ; X32-AVX1:       # %bb.0: -; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; X32-AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; X32-AVX1-NEXT:    vpsrad %xmm1, %xmm2, %xmm2  ; X32-AVX1-NEXT:    vpsrad %xmm1, %xmm0, %xmm0  ; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -799,8 +799,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {  define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {  ; AVX1-LABEL: splatvar_shift_v16i16:  ; AVX1:       # %bb.0: -; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; AVX1-NEXT:    vpsraw %xmm1, %xmm2, %xmm2  ; AVX1-NEXT:    vpsraw %xmm1, %xmm0, %xmm0  ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -814,8 +814,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind  ;  ; XOPAVX1-LABEL: splatvar_shift_v16i16:  ; XOPAVX1:       # %bb.0: -; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; XOPAVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; XOPAVX1-NEXT:    vpsraw %xmm1, %xmm2, %xmm2  ; XOPAVX1-NEXT:    vpsraw %xmm1, %xmm0, %xmm0  ; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -841,8 +841,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind  ;  ; X32-AVX1-LABEL: splatvar_shift_v16i16:  ; X32-AVX1:       # %bb.0: -; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; X32-AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; X32-AVX1-NEXT:    vpsraw %xmm1, %xmm2, %xmm2  ; X32-AVX1-NEXT:    vpsraw %xmm1, %xmm0, %xmm0  ; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -861,8 +861,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind  define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {  ; AVX1-LABEL: splatvar_shift_v32i8:  ; AVX1:       # %bb.0: -; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; AVX1-NEXT:    vpsrlw %xmm1, %xmm2, %xmm2  ; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3  ; AVX1-NEXT:    vpsrlw %xmm1, %xmm3, %xmm3 @@ -968,8 +968,8 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {  ;  ; X32-AVX1-LABEL: splatvar_shift_v32i8:  ; X32-AVX1:       # %bb.0: -; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; X32-AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; X32-AVX1-NEXT:    vpsrlw %xmm1, %xmm2, %xmm2  ; X32-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3  ; X32-AVX1-NEXT:    vpsrlw %xmm1, %xmm3, %xmm3 diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll index 3212c78e5a7..ae087ba3618 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll @@ -580,8 +580,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {  define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {  ; AVX1-LABEL: splatvar_shift_v8i32:  ; AVX1:       # %bb.0: -; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; AVX1-NEXT:    vpsrld %xmm1, %xmm2, %xmm2  ; AVX1-NEXT:    vpsrld %xmm1, %xmm0, %xmm0  ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -595,8 +595,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {  ;  ; XOPAVX1-LABEL: splatvar_shift_v8i32:  ; XOPAVX1:       # %bb.0: -; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; XOPAVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; XOPAVX1-NEXT:    vpsrld %xmm1, %xmm2, %xmm2  ; XOPAVX1-NEXT:    vpsrld %xmm1, %xmm0, %xmm0  ; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -622,8 +622,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {  ;  ; X32-AVX1-LABEL: splatvar_shift_v8i32:  ; X32-AVX1:       # %bb.0: -; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; X32-AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; X32-AVX1-NEXT:    vpsrld %xmm1, %xmm2, %xmm2  ; X32-AVX1-NEXT:    vpsrld %xmm1, %xmm0, %xmm0  ; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -642,8 +642,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {  define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {  ; AVX1-LABEL: splatvar_shift_v16i16:  ; AVX1:       # %bb.0: -; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; AVX1-NEXT:    vpsrlw %xmm1, %xmm2, %xmm2  ; AVX1-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0  ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -657,8 +657,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind  ;  ; XOPAVX1-LABEL: splatvar_shift_v16i16:  ; XOPAVX1:       # %bb.0: -; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; XOPAVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; XOPAVX1-NEXT:    vpsrlw %xmm1, %xmm2, %xmm2  ; XOPAVX1-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0  ; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -684,8 +684,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind  ;  ; X32-AVX1-LABEL: splatvar_shift_v16i16:  ; X32-AVX1:       # %bb.0: -; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; X32-AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; X32-AVX1-NEXT:    vpsrlw %xmm1, %xmm2, %xmm2  ; X32-AVX1-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0  ; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -704,8 +704,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind  define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {  ; AVX1-LABEL: splatvar_shift_v32i8:  ; AVX1:       # %bb.0: -; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; AVX1-NEXT:    vpsrlw %xmm1, %xmm2, %xmm2  ; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3  ; AVX1-NEXT:    vpsrlw %xmm1, %xmm3, %xmm3 @@ -793,8 +793,8 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {  ;  ; X32-AVX1-LABEL: splatvar_shift_v32i8:  ; X32-AVX1:       # %bb.0: -; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; X32-AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; X32-AVX1-NEXT:    vpsrlw %xmm1, %xmm2, %xmm2  ; X32-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3  ; X32-AVX1-NEXT:    vpsrlw %xmm1, %xmm3, %xmm3 diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll index dcf6630c201..bcd24b16795 100644 --- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll @@ -512,8 +512,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {  define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {  ; AVX1-LABEL: splatvar_shift_v8i32:  ; AVX1:       # %bb.0: -; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; AVX1-NEXT:    vpslld %xmm1, %xmm2, %xmm2  ; AVX1-NEXT:    vpslld %xmm1, %xmm0, %xmm0  ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -527,8 +527,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {  ;  ; XOPAVX1-LABEL: splatvar_shift_v8i32:  ; XOPAVX1:       # %bb.0: -; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; XOPAVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; XOPAVX1-NEXT:    vpslld %xmm1, %xmm2, %xmm2  ; XOPAVX1-NEXT:    vpslld %xmm1, %xmm0, %xmm0  ; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -554,8 +554,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {  ;  ; X32-AVX1-LABEL: splatvar_shift_v8i32:  ; X32-AVX1:       # %bb.0: -; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; X32-AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; X32-AVX1-NEXT:    vpslld %xmm1, %xmm2, %xmm2  ; X32-AVX1-NEXT:    vpslld %xmm1, %xmm0, %xmm0  ; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -574,8 +574,8 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {  define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {  ; AVX1-LABEL: splatvar_shift_v16i16:  ; AVX1:       # %bb.0: -; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; AVX1-NEXT:    vpsllw %xmm1, %xmm2, %xmm2  ; AVX1-NEXT:    vpsllw %xmm1, %xmm0, %xmm0  ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -589,8 +589,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind  ;  ; XOPAVX1-LABEL: splatvar_shift_v16i16:  ; XOPAVX1:       # %bb.0: -; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; XOPAVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; XOPAVX1-NEXT:    vpsllw %xmm1, %xmm2, %xmm2  ; XOPAVX1-NEXT:    vpsllw %xmm1, %xmm0, %xmm0  ; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -616,8 +616,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind  ;  ; X32-AVX1-LABEL: splatvar_shift_v16i16:  ; X32-AVX1:       # %bb.0: -; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; X32-AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; X32-AVX1-NEXT:    vpsllw %xmm1, %xmm2, %xmm2  ; X32-AVX1-NEXT:    vpsllw %xmm1, %xmm0, %xmm0  ; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -636,8 +636,8 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind  define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {  ; AVX1-LABEL: splatvar_shift_v32i8:  ; AVX1:       # %bb.0: -; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; AVX1-NEXT:    vpsllw %xmm1, %xmm2, %xmm2  ; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3  ; AVX1-NEXT:    vpsllw %xmm1, %xmm3, %xmm3 @@ -719,8 +719,8 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {  ;  ; X32-AVX1-LABEL: splatvar_shift_v32i8:  ; X32-AVX1:       # %bb.0: -; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; X32-AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; X32-AVX1-NEXT:    vpsllw %xmm1, %xmm2, %xmm2  ; X32-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3  ; X32-AVX1-NEXT:    vpsllw %xmm1, %xmm3, %xmm3  | 

