diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-10-03 09:41:00 +0000 | 
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-10-03 09:41:00 +0000 | 
| commit | 19d535e75bc13de7e0f8dd51124de6cc1114332d (patch) | |
| tree | 3627648a1e1026621ebd144022148224d4998abb | |
| parent | e485b143ead5988e0051f1babe4600700923d3b3 (diff) | |
| download | bcm5719-llvm-19d535e75bc13de7e0f8dd51124de6cc1114332d.tar.gz bcm5719-llvm-19d535e75bc13de7e0f8dd51124de6cc1114332d.zip  | |
[X86][SSE] Add support for PACKSS/PACKUS constant folding
Pulled out of D38472
llvm-svn: 314776
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 85 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll | 44 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll | 54 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll | 18 | 
4 files changed, 133 insertions, 68 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0981d39fe5c..3dd4d74ca40 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5350,6 +5350,13 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,      return false;    }; +  // Handle UNDEFs. +  if (Op.isUndef()) { +    APInt UndefSrcElts = APInt::getAllOnesValue(NumElts); +    SmallVector<APInt, 64> SrcEltBits(NumElts, APInt(EltSizeInBits, 0)); +    return CastBitData(UndefSrcElts, SrcEltBits); +  } +    // Extract constant bits from build vector.    if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {      unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); @@ -31838,6 +31845,82 @@ static SDValue combineShift(SDNode* N, SelectionDAG &DAG,    return SDValue();  } +static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG, +                                 TargetLowering::DAGCombinerInfo &DCI, +                                 const X86Subtarget &Subtarget) { +  unsigned Opcode = N->getOpcode(); +  assert((X86ISD::PACKSS == Opcode || X86ISD::PACKUS == Opcode) && +         "Unexpected shift opcode"); + +  EVT VT = N->getValueType(0); +  EVT SVT = VT.getScalarType(); +  SDValue N0 = N->getOperand(0); +  SDValue N1 = N->getOperand(1); +  unsigned DstBitsPerElt = VT.getScalarSizeInBits(); +  unsigned SrcBitsPerElt = 2 * DstBitsPerElt; +  assert(N0.getScalarValueSizeInBits() == SrcBitsPerElt && +         N1.getScalarValueSizeInBits() == SrcBitsPerElt && +         "Unexpected PACKSS/PACKUS input type"); + +  // Constant Folding. +  APInt UndefElts0, UndefElts1; +  SmallVector<APInt, 32> EltBits0, EltBits1; +  if ((N0->isUndef() || N->isOnlyUserOf(N0.getNode())) && +      (N1->isUndef() || N->isOnlyUserOf(N1.getNode())) && +      getTargetConstantBitsFromNode(N0, SrcBitsPerElt, UndefElts0, EltBits0) && +      getTargetConstantBitsFromNode(N1, SrcBitsPerElt, UndefElts1, EltBits1)) { +    unsigned NumLanes = VT.getSizeInBits() / 128; +    unsigned NumDstElts = VT.getVectorNumElements(); +    unsigned NumSrcElts = NumDstElts / 2; +    unsigned NumDstEltsPerLane = NumDstElts / NumLanes; +    unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes; +    bool IsSigned = (X86ISD::PACKSS == Opcode); + +    APInt Undefs(NumDstElts, 0); +    SmallVector<APInt, 32> Bits(NumDstElts, APInt::getNullValue(DstBitsPerElt)); +    for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { +      for (unsigned Elt = 0; Elt != NumDstEltsPerLane; ++Elt) { +        unsigned SrcIdx = Lane * NumSrcEltsPerLane + Elt % NumSrcEltsPerLane; +        auto &UndefElts = (Elt >= NumSrcEltsPerLane ? UndefElts1 : UndefElts0); +        auto &EltBits = (Elt >= NumSrcEltsPerLane ? EltBits1 : EltBits0); + +        if (UndefElts[SrcIdx]) { +          Undefs.setBit(Lane * NumDstEltsPerLane + Elt); +          continue; +        } + +        APInt &Val = EltBits[SrcIdx]; +        if (IsSigned) { +          // PACKSS: Truncate signed value with signed saturation. +          // Source values less than dst minint are saturated to minint. +          // Source values greater than dst maxint are saturated to maxint. +          if (Val.isSignedIntN(DstBitsPerElt)) +            Val = Val.trunc(DstBitsPerElt); +          else if (Val.isNegative()) +            Val = APInt::getSignedMinValue(DstBitsPerElt); +          else +            Val = APInt::getSignedMaxValue(DstBitsPerElt); +        } else { +          // PACKUS: Truncate signed value with unsigned saturation. +          // Source values less than zero are saturated to zero. +          // Source values greater than dst maxuint are saturated to maxuint. +          if (Val.isIntN(DstBitsPerElt)) +            Val = Val.trunc(DstBitsPerElt); +          else if (Val.isNegative()) +            Val = APInt::getNullValue(DstBitsPerElt); +          else +            Val = APInt::getAllOnesValue(DstBitsPerElt); +        } +        Bits[Lane * NumDstEltsPerLane + Elt] = Val; +      } +    } + +    return getConstVector(Bits, Undefs, VT.getSimpleVT(), DAG, SDLoc(N)); +  } + +  return SDValue(); +} +  static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,                                       TargetLowering::DAGCombinerInfo &DCI,                                       const X86Subtarget &Subtarget) { @@ -36069,6 +36152,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,    case ISD::SETCC:          return combineSetCC(N, DAG, Subtarget);    case X86ISD::SETCC:       return combineX86SetCC(N, DAG, Subtarget);    case X86ISD::BRCOND:      return combineBrCond(N, DAG, Subtarget); +  case X86ISD::PACKSS: +  case X86ISD::PACKUS:      return combineVectorPack(N, DAG, DCI, Subtarget);    case X86ISD::VSHLI:    case X86ISD::VSRAI:    case X86ISD::VSRLI: diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll index 7c9874e9a48..1329c243924 100644 --- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -21,15 +21,15 @@ declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readno  define <16 x i16> @test_x86_avx2_packssdw_fold() {  ; AVX2-LABEL: test_x86_avx2_packssdw_fold:  ; AVX2:       ## BB#0: -; AVX2-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] -; AVX2-NEXT:    vpackssdw LCPI1_0, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x6b,0x05,A,A,A,A] +; AVX2-NEXT:    vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,32767,65535,0,0,0,0,32769,32768,0,65280] +; AVX2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]  ; AVX2-NEXT:    ## fixup A - offset: 4, value: LCPI1_0, kind: FK_Data_4  ; AVX2-NEXT:    retl ## encoding: [0xc3]  ;  ; AVX512VL-LABEL: test_x86_avx2_packssdw_fold:  ; AVX512VL:       ## BB#0: -; AVX512VL-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] -; AVX512VL-NEXT:    vpackssdw LCPI1_0, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x05,A,A,A,A] +; AVX512VL-NEXT:    vmovaps LCPI1_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [0,0,0,0,255,32767,32767,65535,0,0,0,0,32769,32768,0,65280] +; AVX512VL-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]  ; AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI1_0, kind: FK_Data_4  ; AVX512VL-NEXT:    retl ## encoding: [0xc3]    %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> zeroinitializer, <8 x i32> <i32 255, i32 32767, i32 65535, i32 -1, i32 -32767, i32 -65535, i32 0, i32 -256>) @@ -56,20 +56,16 @@ declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readn  define <32 x i8> @test_x86_avx2_packsswb_fold() {  ; AVX2-LABEL: test_x86_avx2_packsswb_fold:  ; AVX2:       ## BB#0: -; AVX2-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] -; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,255,256,65535,65535,65281,65280,32858,0,255,256,65535,65535,65281,65280,32858] -; AVX2-NEXT:    ## encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] +; AVX2-NEXT:    vmovaps {{.*#+}} ymm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0,0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] +; AVX2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]  ; AVX2-NEXT:    ## fixup A - offset: 4, value: LCPI3_0, kind: FK_Data_4 -; AVX2-NEXT:    vpacksswb %ymm0, %ymm1, %ymm0 ## encoding: [0xc5,0xf5,0x63,0xc0]  ; AVX2-NEXT:    retl ## encoding: [0xc3]  ;  ; AVX512VL-LABEL: test_x86_avx2_packsswb_fold:  ; AVX512VL:       ## BB#0: -; AVX512VL-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] -; AVX512VL-NEXT:    vmovdqa LCPI3_0, %ymm1 ## EVEX TO VEX Compression ymm1 = [0,255,256,65535,65535,65281,65280,32858,0,255,256,65535,65535,65281,65280,32858] -; AVX512VL-NEXT:    ## encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] +; AVX512VL-NEXT:    vmovaps LCPI3_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0,0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] +; AVX512VL-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]  ; AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI3_0, kind: FK_Data_4 -; AVX512VL-NEXT:    vpacksswb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x63,0xc0]  ; AVX512VL-NEXT:    retl ## encoding: [0xc3]    %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678, i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <16 x i16> zeroinitializer)    ret <32 x i8> %res @@ -95,20 +91,16 @@ declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readn  define <32 x i8> @test_x86_avx2_packuswb_fold() {  ; AVX2-LABEL: test_x86_avx2_packuswb_fold:  ; AVX2:       ## BB#0: -; AVX2-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] -; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,255,256,65535,65535,65281,65280,32858,0,255,256,65535,65535,65281,65280,32858] -; AVX2-NEXT:    ## encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] +; AVX2-NEXT:    vmovaps {{.*#+}} ymm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] +; AVX2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]  ; AVX2-NEXT:    ## fixup A - offset: 4, value: LCPI5_0, kind: FK_Data_4 -; AVX2-NEXT:    vpackuswb %ymm0, %ymm1, %ymm0 ## encoding: [0xc5,0xf5,0x67,0xc0]  ; AVX2-NEXT:    retl ## encoding: [0xc3]  ;  ; AVX512VL-LABEL: test_x86_avx2_packuswb_fold:  ; AVX512VL:       ## BB#0: -; AVX512VL-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] -; AVX512VL-NEXT:    vmovdqa LCPI5_0, %ymm1 ## EVEX TO VEX Compression ymm1 = [0,255,256,65535,65535,65281,65280,32858,0,255,256,65535,65535,65281,65280,32858] -; AVX512VL-NEXT:    ## encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] +; AVX512VL-NEXT:    vmovaps LCPI5_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] +; AVX512VL-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]  ; AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI5_0, kind: FK_Data_4 -; AVX512VL-NEXT:    vpackuswb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x67,0xc0]  ; AVX512VL-NEXT:    retl ## encoding: [0xc3]    %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678, i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <16 x i16> zeroinitializer)    ret <32 x i8> %res @@ -850,16 +842,16 @@ declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readno  define <16 x i16> @test_x86_avx2_packusdw_fold() {  ; AVX2-LABEL: test_x86_avx2_packusdw_fold:  ; AVX2:       ## BB#0: -; AVX2-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] -; AVX2-NEXT:    vpackusdw LCPI55_0, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x2b,0x05,A,A,A,A] -; AVX2-NEXT:    ## fixup A - offset: 5, value: LCPI55_0, kind: FK_Data_4 +; AVX2-NEXT:    vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0] +; AVX2-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] +; AVX2-NEXT:    ## fixup A - offset: 4, value: LCPI55_0, kind: FK_Data_4  ; AVX2-NEXT:    retl ## encoding: [0xc3]  ;  ; AVX512VL-LABEL: test_x86_avx2_packusdw_fold:  ; AVX512VL:       ## BB#0: -; AVX512VL-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] -; AVX512VL-NEXT:    vpackusdw LCPI55_0, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x05,A,A,A,A] -; AVX512VL-NEXT:    ## fixup A - offset: 5, value: LCPI55_0, kind: FK_Data_4 +; AVX512VL-NEXT:    vmovaps LCPI55_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0] +; AVX512VL-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] +; AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI55_0, kind: FK_Data_4  ; AVX512VL-NEXT:    retl ## encoding: [0xc3]    %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> zeroinitializer, <8 x i32> <i32 255, i32 32767, i32 65535, i32 -1, i32 -32767, i32 -65535, i32 0, i32 -256>)    ret <16 x i16> %res diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll index e6d3057fc5d..7324e855088 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -684,22 +684,22 @@ declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind rea  define <8 x i16> @test_x86_sse2_packssdw_128_fold() {  ; SSE-LABEL: test_x86_sse2_packssdw_128_fold:  ; SSE:       ## BB#0: -; SSE-NEXT:    pxor %xmm0, %xmm0 ## encoding: [0x66,0x0f,0xef,0xc0] -; SSE-NEXT:    packssdw LCPI32_0, %xmm0 ## encoding: [0x66,0x0f,0x6b,0x05,A,A,A,A] -; SSE-NEXT:    ## fixup A - offset: 4, value: LCPI32_0, kind: FK_Data_4 +; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768] +; SSE-NEXT:    ## encoding: [0x0f,0x28,0x05,A,A,A,A] +; SSE-NEXT:    ## fixup A - offset: 3, value: LCPI32_0, kind: FK_Data_4  ; SSE-NEXT:    retl ## encoding: [0xc3]  ;  ; AVX2-LABEL: test_x86_sse2_packssdw_128_fold:  ; AVX2:       ## BB#0: -; AVX2-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] -; AVX2-NEXT:    vpackssdw LCPI32_0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x6b,0x05,A,A,A,A] +; AVX2-NEXT:    vmovaps {{.*#+}} xmm0 = [0,0,0,0,32767,32767,65535,32768] +; AVX2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]  ; AVX2-NEXT:    ## fixup A - offset: 4, value: LCPI32_0, kind: FK_Data_4  ; AVX2-NEXT:    retl ## encoding: [0xc3]  ;  ; SKX-LABEL: test_x86_sse2_packssdw_128_fold:  ; SKX:       ## BB#0: -; SKX-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] -; SKX-NEXT:    vpackssdw LCPI32_0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x05,A,A,A,A] +; SKX-NEXT:    vmovaps LCPI32_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,0,0,0,32767,32767,65535,32768] +; SKX-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]  ; SKX-NEXT:    ## fixup A - offset: 4, value: LCPI32_0, kind: FK_Data_4  ; SKX-NEXT:    retl ## encoding: [0xc3]    %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> <i32 65535, i32 65536, i32 -1, i32 -131072>) @@ -731,29 +731,23 @@ declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind rea  define <16 x i8> @test_x86_sse2_packsswb_128_fold() {  ; SSE-LABEL: test_x86_sse2_packsswb_128_fold:  ; SSE:       ## BB#0: -; SSE-NEXT:    pxor %xmm1, %xmm1 ## encoding: [0x66,0x0f,0xef,0xc9] -; SSE-NEXT:    movdqa {{.*#+}} xmm0 = [0,255,256,65535,65535,65281,65280,32858] -; SSE-NEXT:    ## encoding: [0x66,0x0f,0x6f,0x05,A,A,A,A] -; SSE-NEXT:    ## fixup A - offset: 4, value: LCPI34_0, kind: FK_Data_4 -; SSE-NEXT:    packsswb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x63,0xc1] +; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] +; SSE-NEXT:    ## encoding: [0x0f,0x28,0x05,A,A,A,A] +; SSE-NEXT:    ## fixup A - offset: 3, value: LCPI34_0, kind: FK_Data_4  ; SSE-NEXT:    retl ## encoding: [0xc3]  ;  ; AVX2-LABEL: test_x86_sse2_packsswb_128_fold:  ; AVX2:       ## BB#0: -; AVX2-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] -; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,255,256,65535,65535,65281,65280,32858] -; AVX2-NEXT:    ## encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] +; AVX2-NEXT:    vmovaps {{.*#+}} xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] +; AVX2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]  ; AVX2-NEXT:    ## fixup A - offset: 4, value: LCPI34_0, kind: FK_Data_4 -; AVX2-NEXT:    vpacksswb %xmm0, %xmm1, %xmm0 ## encoding: [0xc5,0xf1,0x63,0xc0]  ; AVX2-NEXT:    retl ## encoding: [0xc3]  ;  ; SKX-LABEL: test_x86_sse2_packsswb_128_fold:  ; SKX:       ## BB#0: -; SKX-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] -; SKX-NEXT:    vmovdqa LCPI34_0, %xmm1 ## EVEX TO VEX Compression xmm1 = [0,255,256,65535,65535,65281,65280,32858] -; SKX-NEXT:    ## encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] +; SKX-NEXT:    vmovaps LCPI34_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] +; SKX-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]  ; SKX-NEXT:    ## fixup A - offset: 4, value: LCPI34_0, kind: FK_Data_4 -; SKX-NEXT:    vpacksswb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x63,0xc0]  ; SKX-NEXT:    retl ## encoding: [0xc3]    %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <8 x i16> zeroinitializer)    ret <16 x i8> %res @@ -784,29 +778,23 @@ declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind rea  define <16 x i8> @test_x86_sse2_packuswb_128_fold() {  ; SSE-LABEL: test_x86_sse2_packuswb_128_fold:  ; SSE:       ## BB#0: -; SSE-NEXT:    pxor %xmm1, %xmm1 ## encoding: [0x66,0x0f,0xef,0xc9] -; SSE-NEXT:    movdqa {{.*#+}} xmm0 = [0,255,256,65535,65535,65281,65280,32858] -; SSE-NEXT:    ## encoding: [0x66,0x0f,0x6f,0x05,A,A,A,A] -; SSE-NEXT:    ## fixup A - offset: 4, value: LCPI36_0, kind: FK_Data_4 -; SSE-NEXT:    packuswb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x67,0xc1] +; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] +; SSE-NEXT:    ## encoding: [0x0f,0x28,0x05,A,A,A,A] +; SSE-NEXT:    ## fixup A - offset: 3, value: LCPI36_0, kind: FK_Data_4  ; SSE-NEXT:    retl ## encoding: [0xc3]  ;  ; AVX2-LABEL: test_x86_sse2_packuswb_128_fold:  ; AVX2:       ## BB#0: -; AVX2-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] -; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,255,256,65535,65535,65281,65280,32858] -; AVX2-NEXT:    ## encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] +; AVX2-NEXT:    vmovaps {{.*#+}} xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] +; AVX2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]  ; AVX2-NEXT:    ## fixup A - offset: 4, value: LCPI36_0, kind: FK_Data_4 -; AVX2-NEXT:    vpackuswb %xmm0, %xmm1, %xmm0 ## encoding: [0xc5,0xf1,0x67,0xc0]  ; AVX2-NEXT:    retl ## encoding: [0xc3]  ;  ; SKX-LABEL: test_x86_sse2_packuswb_128_fold:  ; SKX:       ## BB#0: -; SKX-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] -; SKX-NEXT:    vmovdqa LCPI36_0, %xmm1 ## EVEX TO VEX Compression xmm1 = [0,255,256,65535,65535,65281,65280,32858] -; SKX-NEXT:    ## encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] +; SKX-NEXT:    vmovaps LCPI36_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] +; SKX-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]  ; SKX-NEXT:    ## fixup A - offset: 4, value: LCPI36_0, kind: FK_Data_4 -; SKX-NEXT:    vpackuswb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x67,0xc0]  ; SKX-NEXT:    retl ## encoding: [0xc3]    %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <8 x i16> zeroinitializer)    ret <16 x i8> %res diff --git a/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll index 39acde0b5cd..98300a526a9 100644 --- a/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll @@ -138,23 +138,23 @@ declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readno  define <8 x i16> @test_x86_sse41_packusdw_fold() {  ; SSE41-LABEL: test_x86_sse41_packusdw_fold:  ; SSE41:       ## BB#0: -; SSE41-NEXT:    pxor %xmm0, %xmm0 ## encoding: [0x66,0x0f,0xef,0xc0] -; SSE41-NEXT:    packusdw LCPI7_0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x2b,0x05,A,A,A,A] -; SSE41-NEXT:    ## fixup A - offset: 5, value: LCPI7_0, kind: FK_Data_4 +; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [0,0,0,0,65535,65535,0,0] +; SSE41-NEXT:    ## encoding: [0x0f,0x28,0x05,A,A,A,A] +; SSE41-NEXT:    ## fixup A - offset: 3, value: LCPI7_0, kind: FK_Data_4  ; SSE41-NEXT:    retl ## encoding: [0xc3]  ;  ; AVX2-LABEL: test_x86_sse41_packusdw_fold:  ; AVX2:       ## BB#0: -; AVX2-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0] -; AVX2-NEXT:    vpackusdw LCPI7_0, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x2b,0x05,A,A,A,A] -; AVX2-NEXT:    ## fixup A - offset: 5, value: LCPI7_0, kind: FK_Data_4 +; AVX2-NEXT:    vmovaps {{.*#+}} xmm0 = [0,0,0,0,65535,65535,0,0] +; AVX2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] +; AVX2-NEXT:    ## fixup A - offset: 4, value: LCPI7_0, kind: FK_Data_4  ; AVX2-NEXT:    retl ## encoding: [0xc3]  ;  ; SKX-LABEL: test_x86_sse41_packusdw_fold:  ; SKX:       ## BB#0: -; SKX-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] -; SKX-NEXT:    vpackusdw LCPI7_0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x05,A,A,A,A] -; SKX-NEXT:    ## fixup A - offset: 5, value: LCPI7_0, kind: FK_Data_4 +; SKX-NEXT:    vmovaps LCPI7_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,0,0,0,65535,65535,0,0] +; SKX-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A] +; SKX-NEXT:    ## fixup A - offset: 4, value: LCPI7_0, kind: FK_Data_4  ; SKX-NEXT:    retl ## encoding: [0xc3]    %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> zeroinitializer, <4 x i32> <i32 65535, i32 65536, i32 -1, i32 -131072>)    ret <8 x i16> %res  | 

