diff options
| author | Craig Topper <craig.topper@intel.com> | 2019-05-13 04:03:35 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2019-05-13 04:03:35 +0000 |
| commit | 61e556d2bdf3fa0a10dbaadd2dd03d01c341bd27 (patch) | |
| tree | 1d9a5a6ac4e14c07eee68a7b68b2fe7fed6758f7 /llvm/test | |
| parent | 3e6d69063d71dfa04add3628a93f8ac6acc0c1e0 (diff) | |
| download | bcm5719-llvm-61e556d2bdf3fa0a10dbaadd2dd03d01c341bd27.tar.gz bcm5719-llvm-61e556d2bdf3fa0a10dbaadd2dd03d01c341bd27.zip | |
Recommit r358887 "[TargetLowering][AMDGPU][X86] Improve SimplifyDemandedBits bitcast handling"
I've included a new fix in X86RegisterInfo to prevent PR41619 without
reintroducing r359392. We might be able to improve that in the base class
implementation of shouldRewriteCopySrc somehow. But this hopefully enables
forward progress on SimplifyDemandedBits improvements for now.
Original commit message:
This patch adds support for BigBitWidth -> SmallBitWidth bitcasts, splitting the DemandedBits/Elts accordingly.
The AMDGPU backend needed an extra (srl (and x, c1 << c2), c2) -> (and (srl(x, c2), c1) combine to encourage BFE creation, I investigated putting this in DAGComb
but it caused a lot of noise on other targets - some improvements, some regressions.
The X86 changes are all definite wins.
llvm-svn: 360552
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/bitcast-setcc-256.ll | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/bitcast-setcc-512.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/bitcast-vector-bool.ll | 24 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/dagcombine-cse.ll | 13 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/movmsk-cmp.ll | 112 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/pr41619.ll | 27 |
6 files changed, 62 insertions, 138 deletions
diff --git a/llvm/test/CodeGen/X86/bitcast-setcc-256.ll b/llvm/test/CodeGen/X86/bitcast-setcc-256.ll index 41635f37528..f9a233a583b 100644 --- a/llvm/test/CodeGen/X86/bitcast-setcc-256.ll +++ b/llvm/test/CodeGen/X86/bitcast-setcc-256.ll @@ -448,22 +448,6 @@ define void @bitcast_8i32_store(i8* %p, <8 x i32> %a0) { define void @bitcast_4i64_store(i4* %p, <4 x i64> %a0) { ; SSE2-SSSE3-LABEL: bitcast_4i64_store: ; SSE2-SSSE3: # %bb.0: -; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] -; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1 -; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm3 -; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm3 -; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm4 -; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm4 -; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2] -; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1 -; SSE2-SSSE3-NEXT: por %xmm4, %xmm1 -; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm0 -; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm3 -; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm3 -; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 -; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] -; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0 -; SSE2-SSSE3-NEXT: por %xmm2, %xmm0 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax ; SSE2-SSSE3-NEXT: movb %al, (%rdi) diff --git a/llvm/test/CodeGen/X86/bitcast-setcc-512.ll b/llvm/test/CodeGen/X86/bitcast-setcc-512.ll index 3c294345dd5..177be1fd6a6 100644 --- a/llvm/test/CodeGen/X86/bitcast-setcc-512.ll +++ b/llvm/test/CodeGen/X86/bitcast-setcc-512.ll @@ -609,15 +609,13 @@ define void @bitcast_8i64_store(i8* %p, <8 x i64> %a0) { ; ; AVX1-LABEL: bitcast_8i64_store: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: movb %al, (%rdi) diff --git a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll index 0914f2bff9f..adcee2abe33 100644 --- a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll +++ b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll @@ -208,22 +208,6 @@ define i8 @bitcast_v16i8_to_v2i8(<16 x i8> %a0) nounwind { define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind { ; SSE2-SSSE3-LABEL: bitcast_v4i64_to_v2i2: ; SSE2-SSSE3: # %bb.0: -; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] -; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1 -; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm3 -; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm3 -; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm4 -; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm4 -; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2] -; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1 -; SSE2-SSSE3-NEXT: por %xmm4, %xmm1 -; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm0 -; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm3 -; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm3 -; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm2 -; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] -; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0 -; SSE2-SSSE3-NEXT: por %xmm2, %xmm0 ; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax ; SSE2-SSSE3-NEXT: movl %eax, %ecx @@ -532,15 +516,13 @@ define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind { ; ; AVX1-LABEL: bitcast_v8i64_to_v2i4: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: movl %eax, %ecx diff --git a/llvm/test/CodeGen/X86/dagcombine-cse.ll b/llvm/test/CodeGen/X86/dagcombine-cse.ll index bf1dab35875..a532d87170d 100644 --- a/llvm/test/CodeGen/X86/dagcombine-cse.ll +++ b/llvm/test/CodeGen/X86/dagcombine-cse.ll @@ -14,18 +14,11 @@ define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) n ; ; X64-LABEL: t: ; X64: ## %bb.0: ## %entry -; X64-NEXT: ## kill: def $edx killed $edx def $rdx -; X64-NEXT: ## kill: def $esi killed $esi def $rsi ; X64-NEXT: imull %ecx, %esi -; X64-NEXT: leal (%rsi,%rdx), %eax -; X64-NEXT: cltq +; X64-NEXT: addl %edx, %esi +; X64-NEXT: movslq %esi, %rax ; X64-NEXT: movl (%rdi,%rax), %eax -; X64-NEXT: leal 4(%rsi,%rdx), %ecx -; X64-NEXT: movslq %ecx, %rcx -; X64-NEXT: movzwl (%rdi,%rcx), %ecx -; X64-NEXT: shlq $32, %rcx -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: movq %rcx, %xmm0 +; X64-NEXT: movq %rax, %xmm0 ; X64-NEXT: movd %xmm0, %eax ; X64-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll index 2e6123ff014..c8b3488af16 100644 --- a/llvm/test/CodeGen/X86/movmsk-cmp.ll +++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll @@ -929,22 +929,6 @@ define i1 @allzeros_v16i32_sign(<16 x i32> %arg) { define i1 @allones_v4i64_sign(<4 x i64> %arg) { ; SSE2-LABEL: allones_v4i64_sign: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] -; SSE2-NEXT: pxor %xmm2, %xmm1 -; SSE2-NEXT: movdqa %xmm2, %xmm3 -; SSE2-NEXT: pcmpeqd %xmm1, %xmm3 -; SSE2-NEXT: movdqa %xmm2, %xmm4 -; SSE2-NEXT: pcmpgtd %xmm1, %xmm4 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2] -; SSE2-NEXT: pand %xmm3, %xmm1 -; SSE2-NEXT: por %xmm4, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm2, %xmm3 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm3 -; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] -; SSE2-NEXT: pand %xmm3, %xmm0 -; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: packssdw %xmm1, %xmm0 ; SSE2-NEXT: movmskps %xmm0, %eax ; SSE2-NEXT: cmpb $15, %al @@ -989,22 +973,6 @@ define i1 @allones_v4i64_sign(<4 x i64> %arg) { define i1 @allzeros_v4i64_sign(<4 x i64> %arg) { ; SSE2-LABEL: allzeros_v4i64_sign: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] -; SSE2-NEXT: pxor %xmm2, %xmm1 -; SSE2-NEXT: movdqa %xmm2, %xmm3 -; SSE2-NEXT: pcmpeqd %xmm1, %xmm3 -; SSE2-NEXT: movdqa %xmm2, %xmm4 -; SSE2-NEXT: pcmpgtd %xmm1, %xmm4 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2] -; SSE2-NEXT: pand %xmm3, %xmm1 -; SSE2-NEXT: por %xmm4, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm2, %xmm3 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm3 -; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] -; SSE2-NEXT: pand %xmm3, %xmm0 -; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: packssdw %xmm1, %xmm0 ; SSE2-NEXT: movmskps %xmm0, %eax ; SSE2-NEXT: testb %al, %al @@ -1095,15 +1063,13 @@ define i1 @allones_v8i64_sign(<8 x i64> %arg) { ; ; AVX1-LABEL: allones_v8i64_sign: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: cmpb $-1, %al @@ -1198,15 +1164,13 @@ define i1 @allzeros_v8i64_sign(<8 x i64> %arg) { ; ; AVX1-LABEL: allzeros_v8i64_sign: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: testb %al, %al @@ -2539,19 +2503,17 @@ define i1 @allones_v8i64_and1(<8 x i64> %arg) { ; ; AVX1-LABEL: allones_v8i64_and1: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2 -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsllq $63, %xmm1, %xmm1 -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2 +; AVX1-NEXT: vpsllq $63, %xmm1, %xmm1 +; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: cmpb $-1, %al @@ -2615,19 +2577,17 @@ define i1 @allzeros_v8i64_and1(<8 x i64> %arg) { ; ; AVX1-LABEL: allzeros_v8i64_and1: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2 -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsllq $63, %xmm1, %xmm1 -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2 +; AVX1-NEXT: vpsllq $63, %xmm1, %xmm1 +; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: testb %al, %al @@ -3962,19 +3922,17 @@ define i1 @allones_v8i64_and4(<8 x i64> %arg) { ; ; AVX1-LABEL: allones_v8i64_and4: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2 -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsllq $61, %xmm1, %xmm1 -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2 +; AVX1-NEXT: vpsllq $61, %xmm1, %xmm1 +; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: cmpb $-1, %al @@ -4038,19 +3996,17 @@ define i1 @allzeros_v8i64_and4(<8 x i64> %arg) { ; ; AVX1-LABEL: allzeros_v8i64_and4: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2 -; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpsllq $61, %xmm1, %xmm1 -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2 +; AVX1-NEXT: vpsllq $61, %xmm1, %xmm1 +; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vmovmskps %ymm0, %eax ; AVX1-NEXT: testb %al, %al @@ -4170,22 +4126,6 @@ define i32 @movmskps(<4 x float> %x) { define i32 @movmskpd256(<4 x double> %x) { ; SSE2-LABEL: movmskpd256: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] -; SSE2-NEXT: pxor %xmm2, %xmm1 -; SSE2-NEXT: movdqa %xmm2, %xmm3 -; SSE2-NEXT: pcmpeqd %xmm1, %xmm3 -; SSE2-NEXT: movdqa %xmm2, %xmm4 -; SSE2-NEXT: pcmpgtd %xmm1, %xmm4 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,0,2,2] -; SSE2-NEXT: pand %xmm3, %xmm1 -; SSE2-NEXT: por %xmm4, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm2, %xmm3 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm3 -; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] -; SSE2-NEXT: pand %xmm3, %xmm0 -; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: packssdw %xmm1, %xmm0 ; SSE2-NEXT: movmskps %xmm0, %eax ; SSE2-NEXT: retq diff --git a/llvm/test/CodeGen/X86/pr41619.ll b/llvm/test/CodeGen/X86/pr41619.ll new file mode 100644 index 00000000000..7c71f2c1c29 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr41619.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.14.0 -mattr=avx2 | FileCheck %s + +define void @foo(double %arg) { +; CHECK-LABEL: foo: +; CHECK: ## %bb.0: ## %bb +; CHECK-NEXT: vmovq %xmm0, %rax +; CHECK-NEXT: vmovd %eax, %xmm0 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vmovq %xmm0, %rax +; CHECK-NEXT: movl %eax, (%rax) +; CHECK-NEXT: vmovlps %xmm1, (%rax) +; CHECK-NEXT: retq +bb: + %tmp = bitcast double %arg to i64 + %tmp1 = trunc i64 %tmp to i32 + %tmp2 = bitcast i32 %tmp1 to float + %tmp3 = insertelement <4 x float> zeroinitializer, float %tmp2, i32 2 + %tmp4 = bitcast <4 x float> %tmp3 to <2 x double> + %tmp5 = extractelement <2 x double> %tmp4, i32 0 + %tmp6 = extractelement <2 x double> %tmp4, i32 1 + %tmp7 = bitcast double %tmp6 to i64 + %tmp8 = trunc i64 %tmp7 to i32 + store i32 %tmp8, i32* undef, align 4 + store double %tmp5, double* undef, align 16 + ret void +} |

