diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-10-22 22:14:05 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-10-22 22:14:05 +0000 |
| commit | c8e183f9ee365e14eda594ec6428cef0ad0f7be7 (patch) | |
| tree | b234c91521556727f625083ca47e3ebb0e6ae596 /llvm/test/CodeGen | |
| parent | 21a62e23d8730a356b22a10a3a364ea3a10d9ac2 (diff) | |
| download | bcm5719-llvm-c8e183f9ee365e14eda594ec6428cef0ad0f7be7.tar.gz bcm5719-llvm-c8e183f9ee365e14eda594ec6428cef0ad0f7be7.zip | |
Recommit r344877 "[X86] Stop promoting integer loads to vXi64"
I've included a fix to DAGCombiner::ForwardStoreValueToDirectLoad that I believe will prevent the previous miscompile.
Original commit message:
Theoretically this was done to simplify the amount of isel patterns that were needed. But it also meant a substantial number of our isel patterns have to match an explicit bitcast. By making the vXi32/vXi16/vXi8 types legal for loads, DAG combiner should be able to change the load type to rem
I had to add some additional plain load instruction patterns and a few other special cases, but overall the isel table has reduced in size by ~12000 bytes. So it looks like this promotion was hurting us more than helping.
I still have one crash in vector-trunc.ll that I'm hoping @RKSimon can help with. It seems to relate to using getTargetConstantFromNode on a load that was shrunk due to an extract_subvector combine after the constant pool entry was created. So we end up decoding more mask elements than the lo
I'm hoping this patch will simplify the number of patterns needed to remove the and/or/xor promotion.
Reviewers: RKSimon, spatel
Reviewed By: RKSimon
Subscribers: llvm-commits, RKSimon
Differential Revision: https://reviews.llvm.org/D53306
llvm-svn: 344965
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-vperm2x128.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/oddshuffles.ll | 24 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/pshufb-mask-comments.ll | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-extend-inreg.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-idiv-v2i32.ll | 18 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/widened-broadcast.ll | 95 |
6 files changed, 79 insertions, 68 deletions
diff --git a/llvm/test/CodeGen/X86/avx-vperm2x128.ll b/llvm/test/CodeGen/X86/avx-vperm2x128.ll index 75a11845b1e..0c501ea6895 100644 --- a/llvm/test/CodeGen/X86/avx-vperm2x128.ll +++ b/llvm/test/CodeGen/X86/avx-vperm2x128.ll @@ -224,7 +224,7 @@ entry: define <16 x i16> @shuffle_v16i16_4501_mem(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone ssp { ; AVX1-LABEL: shuffle_v16i16_4501_mem: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovdqa (%rdi), %ymm0 +; AVX1-NEXT: vmovdqa (%rdi), %xmm0 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[0,1],ymm0[0,1] diff --git a/llvm/test/CodeGen/X86/oddshuffles.ll b/llvm/test/CodeGen/X86/oddshuffles.ll index 6affef33932..9216cad5882 100644 --- a/llvm/test/CodeGen/X86/oddshuffles.ll +++ b/llvm/test/CodeGen/X86/oddshuffles.ll @@ -1630,7 +1630,7 @@ define void @interleave_24i32_in(<24 x i32>* %p, <8 x i32>* %q1, <8 x i32>* %q2, ; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm5 = ymm1[1,2,3,3,5,6,7,7] ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm5 = ymm5[2,2,2,3] ; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2,3],ymm4[4],ymm5[5,6],ymm4[7] -; AVX2-SLOW-NEXT: vbroadcastsd 24(%rsi), %ymm5 +; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm5 = ymm0[0,3,3,3] ; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1],ymm5[2],ymm4[3,4],ymm5[5],ymm4[6,7] ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,2,2] ; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[1,1,2,2] @@ -1654,19 +1654,19 @@ define void @interleave_24i32_in(<24 x i32>* %p, <8 x i32>* %q1, <8 x i32>* %q2, ; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0],ymm3[1],ymm4[2,3],ymm3[4],ymm4[5,6],ymm3[7] ; AVX2-FAST-NEXT: vbroadcastsd %xmm2, %ymm4 ; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1],ymm4[2],ymm3[3,4],ymm4[5],ymm3[6,7] -; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,2,2] -; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm4 = ymm2[1,1,2,2] -; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm4[0],ymm0[1],ymm4[2,3],ymm0[4],ymm4[5,6],ymm0[7] -; AVX2-FAST-NEXT: vpermilps {{.*#+}} ymm4 = ymm1[0,0,3,3,4,4,7,7] -; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm4[2],ymm0[3,4],ymm4[5],ymm0[6,7] -; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm4 = [5,6,5,6,5,6,7,7] -; AVX2-FAST-NEXT: vpermps %ymm1, %ymm4, %ymm1 +; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm4 = ymm0[1,1,2,2] +; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm5 = ymm2[1,1,2,2] +; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2,3],ymm4[4],ymm5[5,6],ymm4[7] +; AVX2-FAST-NEXT: vpermilps {{.*#+}} ymm5 = ymm1[0,0,3,3,4,4,7,7] +; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1],ymm5[2],ymm4[3,4],ymm5[5],ymm4[6,7] +; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm5 = [5,6,5,6,5,6,7,7] +; AVX2-FAST-NEXT: vpermps %ymm1, %ymm5, %ymm1 ; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[2,1,3,3] ; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2,3],ymm2[4],ymm1[5,6],ymm2[7] -; AVX2-FAST-NEXT: vbroadcastsd 24(%rsi), %ymm2 -; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm2[2],ymm1[3,4],ymm2[5],ymm1[6,7] -; AVX2-FAST-NEXT: vmovups %ymm1, 64(%rdi) -; AVX2-FAST-NEXT: vmovups %ymm0, 32(%rdi) +; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,3,3] +; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3,4],ymm0[5],ymm1[6,7] +; AVX2-FAST-NEXT: vmovups %ymm0, 64(%rdi) +; AVX2-FAST-NEXT: vmovups %ymm4, 32(%rdi) ; AVX2-FAST-NEXT: vmovups %ymm3, (%rdi) ; AVX2-FAST-NEXT: vzeroupper ; AVX2-FAST-NEXT: retq diff --git a/llvm/test/CodeGen/X86/pshufb-mask-comments.ll b/llvm/test/CodeGen/X86/pshufb-mask-comments.ll index 0900fdccb49..d0ed99f92f3 100644 --- a/llvm/test/CodeGen/X86/pshufb-mask-comments.ll +++ b/llvm/test/CodeGen/X86/pshufb-mask-comments.ll @@ -57,9 +57,9 @@ define <16 x i8> @test5(<16 x i8> %V) { ; CHECK-NEXT: movl $1, %eax ; CHECK-NEXT: movq %rax, %xmm1 ; CHECK-NEXT: movdqa %xmm1, (%rax) -; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [1,1] -; CHECK-NEXT: movdqa %xmm1, (%rax) -; CHECK-NEXT: pshufb %xmm1, %xmm0 +; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1,1] +; CHECK-NEXT: movaps %xmm1, (%rax) +; CHECK-NEXT: pshufb (%rax), %xmm0 ; CHECK-NEXT: retq store <2 x i64> <i64 1, i64 0>, <2 x i64>* undef, align 16 %l = load <2 x i64>, <2 x i64>* undef, align 16 diff --git a/llvm/test/CodeGen/X86/vector-extend-inreg.ll b/llvm/test/CodeGen/X86/vector-extend-inreg.ll index 86bb13f57eb..d790cb54b61 100644 --- a/llvm/test/CodeGen/X86/vector-extend-inreg.ll +++ b/llvm/test/CodeGen/X86/vector-extend-inreg.ll @@ -13,6 +13,7 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun ; X32-SSE-NEXT: subl $384, %esp # imm = 0x180 ; X32-SSE-NEXT: movl 88(%ebp), %ecx ; X32-SSE-NEXT: movdqa 72(%ebp), %xmm0 +; X32-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero ; X32-SSE-NEXT: xorps %xmm1, %xmm1 ; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) ; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) @@ -21,7 +22,6 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun ; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) ; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) ; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero ; X32-SSE-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) ; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) ; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) diff --git a/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll b/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll index 49e29ac17a5..00126d67532 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll @@ -693,20 +693,20 @@ define void @test_sdiv_pow2_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind { ; X86-NEXT: movdqa %xmm0, %xmm1 ; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,1,3] -; X86-NEXT: movdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648] -; X86-NEXT: movdqa {{.*#+}} xmm3 = [31,0,31,0] -; X86-NEXT: movdqa %xmm2, %xmm4 -; X86-NEXT: psrlq %xmm3, %xmm4 +; X86-NEXT: movdqa {{.*#+}} xmm2 = [31,0,31,0] +; X86-NEXT: movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648] +; X86-NEXT: movdqa %xmm3, %xmm4 +; X86-NEXT: psrlq %xmm2, %xmm4 ; X86-NEXT: movl $31, %ecx ; X86-NEXT: movd %ecx, %xmm5 -; X86-NEXT: psrlq %xmm5, %xmm2 -; X86-NEXT: movsd {{.*#+}} xmm2 = xmm4[0],xmm2[1] +; X86-NEXT: psrlq %xmm5, %xmm3 +; X86-NEXT: movsd {{.*#+}} xmm3 = xmm4[0],xmm3[1] ; X86-NEXT: movdqa %xmm1, %xmm4 -; X86-NEXT: psrlq %xmm3, %xmm4 +; X86-NEXT: psrlq %xmm2, %xmm4 ; X86-NEXT: psrlq %xmm5, %xmm1 ; X86-NEXT: movsd {{.*#+}} xmm1 = xmm4[0],xmm1[1] -; X86-NEXT: xorpd %xmm2, %xmm1 -; X86-NEXT: psubq %xmm2, %xmm1 +; X86-NEXT: xorpd %xmm3, %xmm1 +; X86-NEXT: psubq %xmm3, %xmm1 ; X86-NEXT: pand {{\.LCPI.*}}, %xmm1 ; X86-NEXT: psrlq $29, %xmm1 ; X86-NEXT: paddq %xmm0, %xmm1 diff --git a/llvm/test/CodeGen/X86/widened-broadcast.ll b/llvm/test/CodeGen/X86/widened-broadcast.ll index ce99d22dbbd..167128ae002 100644 --- a/llvm/test/CodeGen/X86/widened-broadcast.ll +++ b/llvm/test/CodeGen/X86/widened-broadcast.ll @@ -121,10 +121,21 @@ define <8 x i32> @load_splat_8i32_4i32_01010101(<4 x i32>* %ptr) nounwind uwtabl ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: retq ; -; AVX-LABEL: load_splat_8i32_4i32_01010101: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: load_splat_8i32_4i32_01010101: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: load_splat_8i32_4i32_01010101: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_splat_8i32_4i32_01010101: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0 +; AVX512-NEXT: retq entry: %ld = load <4 x i32>, <4 x i32>* %ptr %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> @@ -138,21 +149,10 @@ define <8 x i32> @load_splat_8i32_8i32_01010101(<8 x i32>* %ptr) nounwind uwtabl ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: retq ; -; AVX1-LABEL: load_splat_8i32_8i32_01010101: -; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: load_splat_8i32_8i32_01010101: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: load_splat_8i32_8i32_01010101: -; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: load_splat_8i32_8i32_01010101: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 +; AVX-NEXT: retq entry: %ld = load <8 x i32>, <8 x i32>* %ptr %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> @@ -246,10 +246,21 @@ define <16 x i16> @load_splat_16i16_8i16_0123012301230123(<8 x i16>* %ptr) nounw ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: retq ; -; AVX-LABEL: load_splat_16i16_8i16_0123012301230123: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: load_splat_16i16_8i16_0123012301230123: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: load_splat_16i16_8i16_0123012301230123: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_splat_16i16_8i16_0123012301230123: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0 +; AVX512-NEXT: retq entry: %ld = load <8 x i16>, <8 x i16>* %ptr %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3,i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> @@ -263,21 +274,10 @@ define <16 x i16> @load_splat_16i16_16i16_0101010101010101(<16 x i16>* %ptr) nou ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: retq ; -; AVX1-LABEL: load_splat_16i16_16i16_0101010101010101: -; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,0,0] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: load_splat_16i16_16i16_0101010101010101: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vbroadcastss (%rdi), %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: load_splat_16i16_16i16_0101010101010101: -; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vbroadcastss (%rdi), %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: load_splat_16i16_16i16_0101010101010101: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vbroadcastss (%rdi), %ymm0 +; AVX-NEXT: retq entry: %ld = load <16 x i16>, <16 x i16>* %ptr %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> @@ -446,10 +446,21 @@ define <32 x i8> @load_splat_32i8_16i8_01234567012345670123456701234567(<16 x i8 ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: retq ; -; AVX-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0 +; AVX512-NEXT: retq entry: %ld = load <16 x i8>, <16 x i8>* %ptr %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |

