diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-10-22 16:59:24 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-10-22 16:59:24 +0000 |
| commit | 8d8dcfe690e64608f89af071038851f1c4925ee4 (patch) | |
| tree | 1ecdb3e5dcf10c2ff4923ca303bf0171d0da67ef /llvm/test/CodeGen | |
| parent | ba88ad35ecc38011066084c5ca76d4793c5eb89b (diff) | |
| download | bcm5719-llvm-8d8dcfe690e64608f89af071038851f1c4925ee4.tar.gz bcm5719-llvm-8d8dcfe690e64608f89af071038851f1c4925ee4.zip | |
Revert r344877 "[X86] Stop promoting integer loads to vXi64"
Sam McCall reported miscompiles in some tensorflow code. Reverting while I try to figure out.
llvm-svn: 344921
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-vperm2x128.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/oddshuffles.ll | 24 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/pshufb-mask-comments.ll | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-extend-inreg.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-idiv-v2i32.ll | 18 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/widened-broadcast.ll | 95 |
6 files changed, 68 insertions, 79 deletions
diff --git a/llvm/test/CodeGen/X86/avx-vperm2x128.ll b/llvm/test/CodeGen/X86/avx-vperm2x128.ll index 0c501ea6895..75a11845b1e 100644 --- a/llvm/test/CodeGen/X86/avx-vperm2x128.ll +++ b/llvm/test/CodeGen/X86/avx-vperm2x128.ll @@ -224,7 +224,7 @@ entry: define <16 x i16> @shuffle_v16i16_4501_mem(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone ssp { ; AVX1-LABEL: shuffle_v16i16_4501_mem: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovdqa (%rdi), %ymm0 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[0,1],ymm0[0,1] diff --git a/llvm/test/CodeGen/X86/oddshuffles.ll b/llvm/test/CodeGen/X86/oddshuffles.ll index 9216cad5882..6affef33932 100644 --- a/llvm/test/CodeGen/X86/oddshuffles.ll +++ b/llvm/test/CodeGen/X86/oddshuffles.ll @@ -1630,7 +1630,7 @@ define void @interleave_24i32_in(<24 x i32>* %p, <8 x i32>* %q1, <8 x i32>* %q2, ; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm5 = ymm1[1,2,3,3,5,6,7,7] ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm5 = ymm5[2,2,2,3] ; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2,3],ymm4[4],ymm5[5,6],ymm4[7] -; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm5 = ymm0[0,3,3,3] +; AVX2-SLOW-NEXT: vbroadcastsd 24(%rsi), %ymm5 ; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1],ymm5[2],ymm4[3,4],ymm5[5],ymm4[6,7] ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,2,2] ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[1,1,2,2] @@ -1654,19 +1654,19 @@ define void @interleave_24i32_in(<24 x i32>* %p, <8 x i32>* %q1, <8 x i32>* %q2, ; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0],ymm3[1],ymm4[2,3],ymm3[4],ymm4[5,6],ymm3[7] ; AVX2-FAST-NEXT: vbroadcastsd %xmm2, %ymm4 ; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1],ymm4[2],ymm3[3,4],ymm4[5],ymm3[6,7] -; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm4 = ymm0[1,1,2,2] -; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm5 = ymm2[1,1,2,2] -; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2,3],ymm4[4],ymm5[5,6],ymm4[7] -; AVX2-FAST-NEXT: vpermilps {{.*#+}} ymm5 = ymm1[0,0,3,3,4,4,7,7] -; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1],ymm5[2],ymm4[3,4],ymm5[5],ymm4[6,7] -; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm5 = [5,6,5,6,5,6,7,7] -; AVX2-FAST-NEXT: vpermps %ymm1, %ymm5, %ymm1 +; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,2,2] +; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm4 = ymm2[1,1,2,2] +; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm4[0],ymm0[1],ymm4[2,3],ymm0[4],ymm4[5,6],ymm0[7] +; AVX2-FAST-NEXT: vpermilps {{.*#+}} ymm4 = ymm1[0,0,3,3,4,4,7,7] +; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm4[2],ymm0[3,4],ymm4[5],ymm0[6,7] +; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm4 = [5,6,5,6,5,6,7,7] +; AVX2-FAST-NEXT: vpermps %ymm1, %ymm4, %ymm1 ; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[2,1,3,3] ; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2,3],ymm2[4],ymm1[5,6],ymm2[7] -; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,3,3] -; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3,4],ymm0[5],ymm1[6,7] -; AVX2-FAST-NEXT: vmovups %ymm0, 64(%rdi) -; AVX2-FAST-NEXT: vmovups %ymm4, 32(%rdi) +; AVX2-FAST-NEXT: vbroadcastsd 24(%rsi), %ymm2 +; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm2[2],ymm1[3,4],ymm2[5],ymm1[6,7] +; AVX2-FAST-NEXT: vmovups %ymm1, 64(%rdi) +; AVX2-FAST-NEXT: vmovups %ymm0, 32(%rdi) ; AVX2-FAST-NEXT: vmovups %ymm3, (%rdi) ; AVX2-FAST-NEXT: vzeroupper ; AVX2-FAST-NEXT: retq diff --git a/llvm/test/CodeGen/X86/pshufb-mask-comments.ll b/llvm/test/CodeGen/X86/pshufb-mask-comments.ll index d0ed99f92f3..0900fdccb49 100644 --- a/llvm/test/CodeGen/X86/pshufb-mask-comments.ll +++ b/llvm/test/CodeGen/X86/pshufb-mask-comments.ll @@ -57,9 +57,9 @@ define <16 x i8> @test5(<16 x i8> %V) { ; CHECK-NEXT: movl $1, %eax ; CHECK-NEXT: movq %rax, %xmm1 ; CHECK-NEXT: movdqa %xmm1, (%rax) -; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1,1] -; CHECK-NEXT: movaps %xmm1, (%rax) -; CHECK-NEXT: pshufb (%rax), %xmm0 +; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [1,1] +; CHECK-NEXT: movdqa %xmm1, (%rax) +; CHECK-NEXT: pshufb %xmm1, %xmm0 ; CHECK-NEXT: retq store <2 x i64> <i64 1, i64 0>, <2 x i64>* undef, align 16 %l = load <2 x i64>, <2 x i64>* undef, align 16 diff --git a/llvm/test/CodeGen/X86/vector-extend-inreg.ll b/llvm/test/CodeGen/X86/vector-extend-inreg.ll index d790cb54b61..86bb13f57eb 100644 --- a/llvm/test/CodeGen/X86/vector-extend-inreg.ll +++ b/llvm/test/CodeGen/X86/vector-extend-inreg.ll @@ -13,7 +13,6 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun ; X32-SSE-NEXT: subl $384, %esp # imm = 0x180 ; X32-SSE-NEXT: movl 88(%ebp), %ecx ; X32-SSE-NEXT: movdqa 72(%ebp), %xmm0 -; X32-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero ; X32-SSE-NEXT: xorps %xmm1, %xmm1 ; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) ; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) @@ -22,6 +21,7 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun ; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) ; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) ; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero ; X32-SSE-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) ; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) ; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) diff --git a/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll b/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll index 00126d67532..49e29ac17a5 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll @@ -693,20 +693,20 @@ define void @test_sdiv_pow2_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind { ; X86-NEXT: movdqa %xmm0, %xmm1 ; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,1,3] -; X86-NEXT: movdqa {{.*#+}} xmm2 = [31,0,31,0] -; X86-NEXT: movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648] -; X86-NEXT: movdqa %xmm3, %xmm4 -; X86-NEXT: psrlq %xmm2, %xmm4 +; X86-NEXT: movdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648] +; X86-NEXT: movdqa {{.*#+}} xmm3 = [31,0,31,0] +; X86-NEXT: movdqa %xmm2, %xmm4 +; X86-NEXT: psrlq %xmm3, %xmm4 ; X86-NEXT: movl $31, %ecx ; X86-NEXT: movd %ecx, %xmm5 -; X86-NEXT: psrlq %xmm5, %xmm3 -; X86-NEXT: movsd {{.*#+}} xmm3 = xmm4[0],xmm3[1] +; X86-NEXT: psrlq %xmm5, %xmm2 +; X86-NEXT: movsd {{.*#+}} xmm2 = xmm4[0],xmm2[1] ; X86-NEXT: movdqa %xmm1, %xmm4 -; X86-NEXT: psrlq %xmm2, %xmm4 +; X86-NEXT: psrlq %xmm3, %xmm4 ; X86-NEXT: psrlq %xmm5, %xmm1 ; X86-NEXT: movsd {{.*#+}} xmm1 = xmm4[0],xmm1[1] -; X86-NEXT: xorpd %xmm3, %xmm1 -; X86-NEXT: psubq %xmm3, %xmm1 +; X86-NEXT: xorpd %xmm2, %xmm1 +; X86-NEXT: psubq %xmm2, %xmm1 ; X86-NEXT: pand {{\.LCPI.*}}, %xmm1 ; X86-NEXT: psrlq $29, %xmm1 ; X86-NEXT: paddq %xmm0, %xmm1 diff --git a/llvm/test/CodeGen/X86/widened-broadcast.ll b/llvm/test/CodeGen/X86/widened-broadcast.ll index 167128ae002..ce99d22dbbd 100644 --- a/llvm/test/CodeGen/X86/widened-broadcast.ll +++ b/llvm/test/CodeGen/X86/widened-broadcast.ll @@ -121,21 +121,10 @@ define <8 x i32> @load_splat_8i32_4i32_01010101(<4 x i32>* %ptr) nounwind uwtabl ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: retq ; -; AVX1-LABEL: load_splat_8i32_4i32_01010101: -; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: load_splat_8i32_4i32_01010101: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: load_splat_8i32_4i32_01010101: -; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: load_splat_8i32_4i32_01010101: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 +; AVX-NEXT: retq entry: %ld = load <4 x i32>, <4 x i32>* %ptr %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> @@ -149,10 +138,21 @@ define <8 x i32> @load_splat_8i32_8i32_01010101(<8 x i32>* %ptr) nounwind uwtabl ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: retq ; -; AVX-LABEL: load_splat_8i32_8i32_01010101: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: load_splat_8i32_8i32_01010101: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: load_splat_8i32_8i32_01010101: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_splat_8i32_8i32_01010101: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0 +; AVX512-NEXT: retq entry: %ld = load <8 x i32>, <8 x i32>* %ptr %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> @@ -246,21 +246,10 @@ define <16 x i16> @load_splat_16i16_8i16_0123012301230123(<8 x i16>* %ptr) nounw ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: retq ; -; AVX1-LABEL: load_splat_16i16_8i16_0123012301230123: -; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: load_splat_16i16_8i16_0123012301230123: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: load_splat_16i16_8i16_0123012301230123: -; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: load_splat_16i16_8i16_0123012301230123: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 +; AVX-NEXT: retq entry: %ld = load <8 x i16>, <8 x i16>* %ptr %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3,i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> @@ -274,10 +263,21 @@ define <16 x i16> @load_splat_16i16_16i16_0101010101010101(<16 x i16>* %ptr) nou ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: retq ; -; AVX-LABEL: load_splat_16i16_16i16_0101010101010101: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vbroadcastss (%rdi), %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: load_splat_16i16_16i16_0101010101010101: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,0,0] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: load_splat_16i16_16i16_0101010101010101: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vbroadcastss (%rdi), %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_splat_16i16_16i16_0101010101010101: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vbroadcastss (%rdi), %ymm0 +; AVX512-NEXT: retq entry: %ld = load <16 x i16>, <16 x i16>* %ptr %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> @@ -446,21 +446,10 @@ define <32 x i8> @load_splat_32i8_16i8_01234567012345670123456701234567(<16 x i8 ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: retq ; -; AVX1-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567: -; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567: -; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 +; AVX-NEXT: retq entry: %ld = load <16 x i8>, <16 x i8>* %ptr %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |

