diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 26 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86RegisterInfo.cpp | 15 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86RegisterInfo.h | 5 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/bitcast-setcc-256.ll | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/bitcast-setcc-512.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/bitcast-vector-bool.ll | 24 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/dagcombine-cse.ll | 13 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/movmsk-cmp.ll | 112 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/pr41619.ll | 27 | 
9 files changed, 107 insertions, 139 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 502913886e3..fc7e7d9c3ce 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1581,12 +1581,36 @@ bool TargetLowering::SimplifyDemandedBits(        if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,                                 KnownSrcBits, TLO, Depth + 1))          return true; +    } else if ((NumSrcEltBits % BitWidth) == 0 && +               TLO.DAG.getDataLayout().isLittleEndian()) { +      unsigned Scale = NumSrcEltBits / BitWidth; +      unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; +      APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); +      APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); +      for (unsigned i = 0; i != NumElts; ++i) +        if (DemandedElts[i]) { +          unsigned Offset = (i % Scale) * BitWidth; +          DemandedSrcBits.insertBits(DemandedBits, Offset); +          DemandedSrcElts.setBit(i / Scale); +        } + +      if (SrcVT.isVector()) { +        APInt KnownSrcUndef, KnownSrcZero; +        if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef, +                                       KnownSrcZero, TLO, Depth + 1)) +          return true; +      } + +      KnownBits KnownSrcBits; +      if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, +                               KnownSrcBits, TLO, Depth + 1)) +        return true;      }      // If this is a bitcast, let computeKnownBits handle it.  Only do this on a      // recursive call where Known may be useful to the caller.      if (Depth > 0) { -      Known = TLO.DAG.computeKnownBits(Op, Depth); +      Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);        return false;      }      break; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index 7dec87cdcb0..1e62958e722 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -216,6 +216,21 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,    }  } +bool X86RegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC, +                                           unsigned DefSubReg, +                                           const TargetRegisterClass *SrcRC, +                                           unsigned SrcSubReg) const { +  // Prevent rewriting a copy where the destination size is larger than the +  // input size. See PR41619. +  // FIXME: Should this be factored into the base implementation somehow. +  if (DefRC->hasSuperClassEq(&X86::GR64RegClass) && DefSubReg == 0 && +      SrcRC->hasSuperClassEq(&X86::GR64RegClass) && SrcSubReg == X86::sub_32bit) +    return false; + +  return TargetRegisterInfo::shouldRewriteCopySrc(DefRC, DefSubReg, +                                                  SrcRC, SrcSubReg); +} +  const TargetRegisterClass *  X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const {    const Function &F = MF.getFunction(); diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h index d362f20b472..81e4920f6a5 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.h +++ b/llvm/lib/Target/X86/X86RegisterInfo.h @@ -74,6 +74,11 @@ public:    getLargestLegalSuperClass(const TargetRegisterClass *RC,                              const MachineFunction &MF) const override; +  bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, +                            unsigned DefSubReg, +                            const TargetRegisterClass *SrcRC, +                            unsigned SrcSubReg) const override; +    /// getPointerRegClass - Returns a TargetRegisterClass used for pointer    /// values.    const TargetRegisterClass * diff --git a/llvm/test/CodeGen/X86/bitcast-setcc-256.ll b/llvm/test/CodeGen/X86/bitcast-setcc-256.ll index 41635f37528..f9a233a583b 100644 --- a/llvm/test/CodeGen/X86/bitcast-setcc-256.ll +++ b/llvm/test/CodeGen/X86/bitcast-setcc-256.ll @@ -448,22 +448,6 @@ define void @bitcast_8i32_store(i8* %p, <8 x i32> %a0) {  define void @bitcast_4i64_store(i4* %p, <4 x i64> %a0) {  ; SSE2-SSSE3-LABEL: bitcast_4i64_store:  ; SSE2-SSSE3:       # %bb.0: -; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648] -; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm1 -; SSE2-SSSE3-NEXT:    movdqa %xmm2, %xmm3 -; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm3 -; SSE2-SSSE3-NEXT:    movdqa %xmm2, %xmm4 -; SSE2-SSSE3-NEXT:    pcmpgtd %xmm1, %xmm4 -; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2] -; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm1 -; SSE2-SSSE3-NEXT:    por %xmm4, %xmm1 -; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm0 -; SSE2-SSSE3-NEXT:    movdqa %xmm2, %xmm3 -; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm3 -; SSE2-SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2 -; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] -; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm0 -; SSE2-SSSE3-NEXT:    por %xmm2, %xmm0  ; SSE2-SSSE3-NEXT:    packssdw %xmm1, %xmm0  ; SSE2-SSSE3-NEXT:    movmskps %xmm0, %eax  ; SSE2-SSSE3-NEXT:    movb %al, (%rdi) diff --git a/llvm/test/CodeGen/X86/bitcast-setcc-512.ll b/llvm/test/CodeGen/X86/bitcast-setcc-512.ll index 3c294345dd5..177be1fd6a6 100644 --- a/llvm/test/CodeGen/X86/bitcast-setcc-512.ll +++ b/llvm/test/CodeGen/X86/bitcast-setcc-512.ll @@ -609,15 +609,13 @@ define void @bitcast_8i64_store(i8* %p, <8 x i64> %a0) {  ;  ; AVX1-LABEL: bitcast_8i64_store:  ; AVX1:       # %bb.0: -; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm3, %xmm1 -; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1  ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3  ; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2  ; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0  ; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1  ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0  ; AVX1-NEXT:    vmovmskps %ymm0, %eax  ; AVX1-NEXT:    movb %al, (%rdi) diff --git a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll index 0914f2bff9f..adcee2abe33 100644 --- a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll +++ b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll @@ -208,22 +208,6 @@ define i8 @bitcast_v16i8_to_v2i8(<16 x i8> %a0) nounwind {  define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind {  ; SSE2-SSSE3-LABEL: bitcast_v4i64_to_v2i2:  ; SSE2-SSSE3:       # %bb.0: -; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648] -; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm1 -; SSE2-SSSE3-NEXT:    movdqa %xmm2, %xmm3 -; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm3 -; SSE2-SSSE3-NEXT:    movdqa %xmm2, %xmm4 -; SSE2-SSSE3-NEXT:    pcmpgtd %xmm1, %xmm4 -; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2] -; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm1 -; SSE2-SSSE3-NEXT:    por %xmm4, %xmm1 -; SSE2-SSSE3-NEXT:    pxor %xmm2, %xmm0 -; SSE2-SSSE3-NEXT:    movdqa %xmm2, %xmm3 -; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm3 -; SSE2-SSSE3-NEXT:    pcmpgtd %xmm0, %xmm2 -; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] -; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm0 -; SSE2-SSSE3-NEXT:    por %xmm2, %xmm0  ; SSE2-SSSE3-NEXT:    packssdw %xmm1, %xmm0  ; SSE2-SSSE3-NEXT:    movmskps %xmm0, %eax  ; SSE2-SSSE3-NEXT:    movl %eax, %ecx @@ -532,15 +516,13 @@ define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind {  ;  ; AVX1-LABEL: bitcast_v8i64_to_v2i4:  ; AVX1:       # %bb.0: -; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm3, %xmm1 -; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1  ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3  ; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2  ; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0  ; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1  ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0  ; AVX1-NEXT:    vmovmskps %ymm0, %eax  ; AVX1-NEXT:    movl %eax, %ecx diff --git a/llvm/test/CodeGen/X86/dagcombine-cse.ll b/llvm/test/CodeGen/X86/dagcombine-cse.ll index bf1dab35875..a532d87170d 100644 --- a/llvm/test/CodeGen/X86/dagcombine-cse.ll +++ b/llvm/test/CodeGen/X86/dagcombine-cse.ll @@ -14,18 +14,11 @@ define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) n  ;  ; X64-LABEL: t:  ; X64:       ## %bb.0: ## %entry -; X64-NEXT:    ## kill: def $edx killed $edx def $rdx -; X64-NEXT:    ## kill: def $esi killed $esi def $rsi  ; X64-NEXT:    imull %ecx, %esi -; X64-NEXT:    leal (%rsi,%rdx), %eax -; X64-NEXT:    cltq +; X64-NEXT:    addl %edx, %esi +; X64-NEXT:    movslq %esi, %rax  ; X64-NEXT:    movl (%rdi,%rax), %eax -; X64-NEXT:    leal 4(%rsi,%rdx), %ecx -; X64-NEXT:    movslq %ecx, %rcx -; X64-NEXT:    movzwl (%rdi,%rcx), %ecx -; X64-NEXT:    shlq $32, %rcx -; X64-NEXT:    orq %rax, %rcx -; X64-NEXT:    movq %rcx, %xmm0 +; X64-NEXT:    movq %rax, %xmm0  ; X64-NEXT:    movd %xmm0, %eax  ; X64-NEXT:    retq  entry: diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll index 2e6123ff014..c8b3488af16 100644 --- a/llvm/test/CodeGen/X86/movmsk-cmp.ll +++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll @@ -929,22 +929,6 @@ define i1 @allzeros_v16i32_sign(<16 x i32> %arg) {  define i1 @allones_v4i64_sign(<4 x i64> %arg) {  ; SSE2-LABEL: allones_v4i64_sign:  ; SSE2:       # %bb.0: -; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648] -; SSE2-NEXT:    pxor %xmm2, %xmm1 -; SSE2-NEXT:    movdqa %xmm2, %xmm3 -; SSE2-NEXT:    pcmpeqd %xmm1, %xmm3 -; SSE2-NEXT:    movdqa %xmm2, %xmm4 -; SSE2-NEXT:    pcmpgtd %xmm1, %xmm4 -; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2] -; SSE2-NEXT:    pand %xmm3, %xmm1 -; SSE2-NEXT:    por %xmm4, %xmm1 -; SSE2-NEXT:    pxor %xmm2, %xmm0 -; SSE2-NEXT:    movdqa %xmm2, %xmm3 -; SSE2-NEXT:    pcmpeqd %xmm0, %xmm3 -; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2 -; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] -; SSE2-NEXT:    pand %xmm3, %xmm0 -; SSE2-NEXT:    por %xmm2, %xmm0  ; SSE2-NEXT:    packssdw %xmm1, %xmm0  ; SSE2-NEXT:    movmskps %xmm0, %eax  ; SSE2-NEXT:    cmpb $15, %al @@ -989,22 +973,6 @@ define i1 @allones_v4i64_sign(<4 x i64> %arg) {  define i1 @allzeros_v4i64_sign(<4 x i64> %arg) {  ; SSE2-LABEL: allzeros_v4i64_sign:  ; SSE2:       # %bb.0: -; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648] -; SSE2-NEXT:    pxor %xmm2, %xmm1 -; SSE2-NEXT:    movdqa %xmm2, %xmm3 -; SSE2-NEXT:    pcmpeqd %xmm1, %xmm3 -; SSE2-NEXT:    movdqa %xmm2, %xmm4 -; SSE2-NEXT:    pcmpgtd %xmm1, %xmm4 -; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2] -; SSE2-NEXT:    pand %xmm3, %xmm1 -; SSE2-NEXT:    por %xmm4, %xmm1 -; SSE2-NEXT:    pxor %xmm2, %xmm0 -; SSE2-NEXT:    movdqa %xmm2, %xmm3 -; SSE2-NEXT:    pcmpeqd %xmm0, %xmm3 -; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2 -; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] -; SSE2-NEXT:    pand %xmm3, %xmm0 -; SSE2-NEXT:    por %xmm2, %xmm0  ; SSE2-NEXT:    packssdw %xmm1, %xmm0  ; SSE2-NEXT:    movmskps %xmm0, %eax  ; SSE2-NEXT:    testb %al, %al @@ -1095,15 +1063,13 @@ define i1 @allones_v8i64_sign(<8 x i64> %arg) {  ;  ; AVX1-LABEL: allones_v8i64_sign:  ; AVX1:       # %bb.0: -; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm3, %xmm1 -; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1  ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3  ; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2  ; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0  ; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1  ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0  ; AVX1-NEXT:    vmovmskps %ymm0, %eax  ; AVX1-NEXT:    cmpb $-1, %al @@ -1198,15 +1164,13 @@ define i1 @allzeros_v8i64_sign(<8 x i64> %arg) {  ;  ; AVX1-LABEL: allzeros_v8i64_sign:  ; AVX1:       # %bb.0: -; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm3, %xmm1 -; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1  ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3  ; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2  ; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0  ; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1  ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0  ; AVX1-NEXT:    vmovmskps %ymm0, %eax  ; AVX1-NEXT:    testb %al, %al @@ -2539,19 +2503,17 @@ define i1 @allones_v8i64_and1(<8 x i64> %arg) {  ;  ; AVX1-LABEL: allones_v8i64_and1:  ; AVX1:       # %bb.0: -; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT:    vpsllq $63, %xmm2, %xmm2 -; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT:    vpsllq $63, %xmm1, %xmm1 -; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm3, %xmm1 -; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1  ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; AVX1-NEXT:    vpsllq $63, %xmm2, %xmm2 +; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3  ; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2  ; AVX1-NEXT:    vpsllq $63, %xmm0, %xmm0  ; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0  ; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT:    vpsllq $63, %xmm2, %xmm2 +; AVX1-NEXT:    vpsllq $63, %xmm1, %xmm1 +; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1  ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0  ; AVX1-NEXT:    vmovmskps %ymm0, %eax  ; AVX1-NEXT:    cmpb $-1, %al @@ -2615,19 +2577,17 @@ define i1 @allzeros_v8i64_and1(<8 x i64> %arg) {  ;  ; AVX1-LABEL: allzeros_v8i64_and1:  ; AVX1:       # %bb.0: -; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT:    vpsllq $63, %xmm2, %xmm2 -; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT:    vpsllq $63, %xmm1, %xmm1 -; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm3, %xmm1 -; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1  ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; AVX1-NEXT:    vpsllq $63, %xmm2, %xmm2 +; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3  ; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2  ; AVX1-NEXT:    vpsllq $63, %xmm0, %xmm0  ; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0  ; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT:    vpsllq $63, %xmm2, %xmm2 +; AVX1-NEXT:    vpsllq $63, %xmm1, %xmm1 +; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1  ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0  ; AVX1-NEXT:    vmovmskps %ymm0, %eax  ; AVX1-NEXT:    testb %al, %al @@ -3962,19 +3922,17 @@ define i1 @allones_v8i64_and4(<8 x i64> %arg) {  ;  ; AVX1-LABEL: allones_v8i64_and4:  ; AVX1:       # %bb.0: -; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT:    vpsllq $61, %xmm2, %xmm2 -; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT:    vpsllq $61, %xmm1, %xmm1 -; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm3, %xmm1 -; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1  ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; AVX1-NEXT:    vpsllq $61, %xmm2, %xmm2 +; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3  ; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2  ; AVX1-NEXT:    vpsllq $61, %xmm0, %xmm0  ; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0  ; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT:    vpsllq $61, %xmm2, %xmm2 +; AVX1-NEXT:    vpsllq $61, %xmm1, %xmm1 +; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1  ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0  ; AVX1-NEXT:    vmovmskps %ymm0, %eax  ; AVX1-NEXT:    cmpb $-1, %al @@ -4038,19 +3996,17 @@ define i1 @allzeros_v8i64_and4(<8 x i64> %arg) {  ;  ; AVX1-LABEL: allzeros_v8i64_and4:  ; AVX1:       # %bb.0: -; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT:    vpsllq $61, %xmm2, %xmm2 -; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT:    vpsllq $61, %xmm1, %xmm1 -; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm3, %xmm1 -; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1  ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2  ; AVX1-NEXT:    vpsllq $61, %xmm2, %xmm2 +; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3  ; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2  ; AVX1-NEXT:    vpsllq $61, %xmm0, %xmm0  ; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0  ; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT:    vpsllq $61, %xmm2, %xmm2 +; AVX1-NEXT:    vpsllq $61, %xmm1, %xmm1 +; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1  ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0  ; AVX1-NEXT:    vmovmskps %ymm0, %eax  ; AVX1-NEXT:    testb %al, %al @@ -4170,22 +4126,6 @@ define i32 @movmskps(<4 x float> %x) {  define i32 @movmskpd256(<4 x double> %x) {  ; SSE2-LABEL: movmskpd256:  ; SSE2:       # %bb.0: -; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648] -; SSE2-NEXT:    pxor %xmm2, %xmm1 -; SSE2-NEXT:    movdqa %xmm2, %xmm3 -; SSE2-NEXT:    pcmpeqd %xmm1, %xmm3 -; SSE2-NEXT:    movdqa %xmm2, %xmm4 -; SSE2-NEXT:    pcmpgtd %xmm1, %xmm4 -; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2] -; SSE2-NEXT:    pand %xmm3, %xmm1 -; SSE2-NEXT:    por %xmm4, %xmm1 -; SSE2-NEXT:    pxor %xmm2, %xmm0 -; SSE2-NEXT:    movdqa %xmm2, %xmm3 -; SSE2-NEXT:    pcmpeqd %xmm0, %xmm3 -; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2 -; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] -; SSE2-NEXT:    pand %xmm3, %xmm0 -; SSE2-NEXT:    por %xmm2, %xmm0  ; SSE2-NEXT:    packssdw %xmm1, %xmm0  ; SSE2-NEXT:    movmskps %xmm0, %eax  ; SSE2-NEXT:    retq diff --git a/llvm/test/CodeGen/X86/pr41619.ll b/llvm/test/CodeGen/X86/pr41619.ll new file mode 100644 index 00000000000..7c71f2c1c29 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr41619.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.14.0 -mattr=avx2 | FileCheck %s + +define void @foo(double %arg) { +; CHECK-LABEL: foo: +; CHECK:       ## %bb.0: ## %bb +; CHECK-NEXT:    vmovq %xmm0, %rax +; CHECK-NEXT:    vmovd %eax, %xmm0 +; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT:    vmovq %xmm0, %rax +; CHECK-NEXT:    movl %eax, (%rax) +; CHECK-NEXT:    vmovlps %xmm1, (%rax) +; CHECK-NEXT:    retq +bb: +  %tmp = bitcast double %arg to i64 +  %tmp1 = trunc i64 %tmp to i32 +  %tmp2 = bitcast i32 %tmp1 to float +  %tmp3 = insertelement <4 x float> zeroinitializer, float %tmp2, i32 2 +  %tmp4 = bitcast <4 x float> %tmp3 to <2 x double> +  %tmp5 = extractelement <2 x double> %tmp4, i32 0 +  %tmp6 = extractelement <2 x double> %tmp4, i32 1 +  %tmp7 = bitcast double %tmp6 to i64 +  %tmp8 = trunc i64 %tmp7 to i32 +  store i32 %tmp8, i32* undef, align 4 +  store double %tmp5, double* undef, align 16 +  ret void +}  | 

