diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 22 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-vpermil.ll | 2 | 
2 files changed, 18 insertions, 6 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e35d9e98130..5987e57a693 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3159,7 +3159,13 @@ static bool isCommutedSHUFP(ShuffleVectorSDNode *N) {  /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand  /// specifies a shuffle of elements that is suitable for input to MOVHLPS.  bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) { -  if (N->getValueType(0).getVectorNumElements() != 4) +  EVT VT = N->getValueType(0); +  unsigned NumElems = VT.getVectorNumElements(); + +  if (VT.getSizeInBits() != 128) +    return false; + +  if (NumElems != 4)      return false;    // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 @@ -3173,15 +3179,19 @@ bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {  /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,  /// <2, 3, 2, 3>  bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) { -  unsigned NumElems = N->getValueType(0).getVectorNumElements(); +  EVT VT = N->getValueType(0); +  unsigned NumElems = VT.getVectorNumElements(); + +  if (VT.getSizeInBits() != 128) +    return false;    if (NumElems != 4)      return false;    return isUndefOrEqual(N->getMaskElt(0), 2) && -  isUndefOrEqual(N->getMaskElt(1), 3) && -  isUndefOrEqual(N->getMaskElt(2), 2) && -  isUndefOrEqual(N->getMaskElt(3), 3); +         isUndefOrEqual(N->getMaskElt(1), 3) && +         isUndefOrEqual(N->getMaskElt(2), 2) && +         isUndefOrEqual(N->getMaskElt(3), 3);  }  /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand @@ -3501,6 +3511,8 @@ static unsigned getShuffleVPERMILPSImmediate(SDNode *N) {        int MaskElt = SVOp->getMaskElt(i+(l*LaneSize));        if (MaskElt < 0)          continue; +      if (MaskElt >= LaneSize) +        MaskElt -= LaneSize;        Mask |= MaskElt << (i*2);      }    } diff --git a/llvm/test/CodeGen/X86/avx-vpermil.ll b/llvm/test/CodeGen/X86/avx-vpermil.ll index a3e916dc1eb..f83a4450cf4 100644 --- a/llvm/test/CodeGen/X86/avx-vpermil.ll +++ b/llvm/test/CodeGen/X86/avx-vpermil.ll @@ -31,7 +31,7 @@ entry:  ; vpermil should match masks like this: <u,3,1,2,4,u,5,6>. Check that the  ; target specific mask was correctly generated.  ; CHECK: vpermilps $-100 -define <8 x float> @funcA(<8 x float> %a) nounwind uwtable readnone ssp { +define <8 x float> @funcE(<8 x float> %a) nounwind uwtable readnone ssp {  entry:    %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 8, i32 3, i32 1, i32 2, i32 4, i32 8, i32 5, i32 6>    ret <8 x float> %shuffle  | 

