summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-10-22 16:59:24 +0000
committerCraig Topper <craig.topper@intel.com>2018-10-22 16:59:24 +0000
commit8d8dcfe690e64608f89af071038851f1c4925ee4 (patch)
tree1ecdb3e5dcf10c2ff4923ca303bf0171d0da67ef /llvm/test/CodeGen
parentba88ad35ecc38011066084c5ca76d4793c5eb89b (diff)
downloadbcm5719-llvm-8d8dcfe690e64608f89af071038851f1c4925ee4.tar.gz
bcm5719-llvm-8d8dcfe690e64608f89af071038851f1c4925ee4.zip
Revert r344877 "[X86] Stop promoting integer loads to vXi64"
Sam McCall reported miscompiles in some tensorflow code. Reverting while I try to figure out. llvm-svn: 344921
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/X86/avx-vperm2x128.ll2
-rw-r--r--llvm/test/CodeGen/X86/oddshuffles.ll24
-rw-r--r--llvm/test/CodeGen/X86/pshufb-mask-comments.ll6
-rw-r--r--llvm/test/CodeGen/X86/vector-extend-inreg.ll2
-rw-r--r--llvm/test/CodeGen/X86/vector-idiv-v2i32.ll18
-rw-r--r--llvm/test/CodeGen/X86/widened-broadcast.ll95
6 files changed, 68 insertions, 79 deletions
diff --git a/llvm/test/CodeGen/X86/avx-vperm2x128.ll b/llvm/test/CodeGen/X86/avx-vperm2x128.ll
index 0c501ea6895..75a11845b1e 100644
--- a/llvm/test/CodeGen/X86/avx-vperm2x128.ll
+++ b/llvm/test/CodeGen/X86/avx-vperm2x128.ll
@@ -224,7 +224,7 @@ entry:
define <16 x i16> @shuffle_v16i16_4501_mem(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone ssp {
; AVX1-LABEL: shuffle_v16i16_4501_mem:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vmovdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovdqa (%rdi), %ymm0
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[0,1],ymm0[0,1]
diff --git a/llvm/test/CodeGen/X86/oddshuffles.ll b/llvm/test/CodeGen/X86/oddshuffles.ll
index 9216cad5882..6affef33932 100644
--- a/llvm/test/CodeGen/X86/oddshuffles.ll
+++ b/llvm/test/CodeGen/X86/oddshuffles.ll
@@ -1630,7 +1630,7 @@ define void @interleave_24i32_in(<24 x i32>* %p, <8 x i32>* %q1, <8 x i32>* %q2,
; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm5 = ymm1[1,2,3,3,5,6,7,7]
; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm5 = ymm5[2,2,2,3]
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2,3],ymm4[4],ymm5[5,6],ymm4[7]
-; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm5 = ymm0[0,3,3,3]
+; AVX2-SLOW-NEXT: vbroadcastsd 24(%rsi), %ymm5
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1],ymm5[2],ymm4[3,4],ymm5[5],ymm4[6,7]
; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,2,2]
; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[1,1,2,2]
@@ -1654,19 +1654,19 @@ define void @interleave_24i32_in(<24 x i32>* %p, <8 x i32>* %q1, <8 x i32>* %q2,
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0],ymm3[1],ymm4[2,3],ymm3[4],ymm4[5,6],ymm3[7]
; AVX2-FAST-NEXT: vbroadcastsd %xmm2, %ymm4
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1],ymm4[2],ymm3[3,4],ymm4[5],ymm3[6,7]
-; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm4 = ymm0[1,1,2,2]
-; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm5 = ymm2[1,1,2,2]
-; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2,3],ymm4[4],ymm5[5,6],ymm4[7]
-; AVX2-FAST-NEXT: vpermilps {{.*#+}} ymm5 = ymm1[0,0,3,3,4,4,7,7]
-; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1],ymm5[2],ymm4[3,4],ymm5[5],ymm4[6,7]
-; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm5 = [5,6,5,6,5,6,7,7]
-; AVX2-FAST-NEXT: vpermps %ymm1, %ymm5, %ymm1
+; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,2,2]
+; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm4 = ymm2[1,1,2,2]
+; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm4[0],ymm0[1],ymm4[2,3],ymm0[4],ymm4[5,6],ymm0[7]
+; AVX2-FAST-NEXT: vpermilps {{.*#+}} ymm4 = ymm1[0,0,3,3,4,4,7,7]
+; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm4[2],ymm0[3,4],ymm4[5],ymm0[6,7]
+; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm4 = [5,6,5,6,5,6,7,7]
+; AVX2-FAST-NEXT: vpermps %ymm1, %ymm4, %ymm1
; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[2,1,3,3]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2,3],ymm2[4],ymm1[5,6],ymm2[7]
-; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,3,3]
-; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3,4],ymm0[5],ymm1[6,7]
-; AVX2-FAST-NEXT: vmovups %ymm0, 64(%rdi)
-; AVX2-FAST-NEXT: vmovups %ymm4, 32(%rdi)
+; AVX2-FAST-NEXT: vbroadcastsd 24(%rsi), %ymm2
+; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm2[2],ymm1[3,4],ymm2[5],ymm1[6,7]
+; AVX2-FAST-NEXT: vmovups %ymm1, 64(%rdi)
+; AVX2-FAST-NEXT: vmovups %ymm0, 32(%rdi)
; AVX2-FAST-NEXT: vmovups %ymm3, (%rdi)
; AVX2-FAST-NEXT: vzeroupper
; AVX2-FAST-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/pshufb-mask-comments.ll b/llvm/test/CodeGen/X86/pshufb-mask-comments.ll
index d0ed99f92f3..0900fdccb49 100644
--- a/llvm/test/CodeGen/X86/pshufb-mask-comments.ll
+++ b/llvm/test/CodeGen/X86/pshufb-mask-comments.ll
@@ -57,9 +57,9 @@ define <16 x i8> @test5(<16 x i8> %V) {
; CHECK-NEXT: movl $1, %eax
; CHECK-NEXT: movq %rax, %xmm1
; CHECK-NEXT: movdqa %xmm1, (%rax)
-; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1,1]
-; CHECK-NEXT: movaps %xmm1, (%rax)
-; CHECK-NEXT: pshufb (%rax), %xmm0
+; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [1,1]
+; CHECK-NEXT: movdqa %xmm1, (%rax)
+; CHECK-NEXT: pshufb %xmm1, %xmm0
; CHECK-NEXT: retq
store <2 x i64> <i64 1, i64 0>, <2 x i64>* undef, align 16
%l = load <2 x i64>, <2 x i64>* undef, align 16
diff --git a/llvm/test/CodeGen/X86/vector-extend-inreg.ll b/llvm/test/CodeGen/X86/vector-extend-inreg.ll
index d790cb54b61..86bb13f57eb 100644
--- a/llvm/test/CodeGen/X86/vector-extend-inreg.ll
+++ b/llvm/test/CodeGen/X86/vector-extend-inreg.ll
@@ -13,7 +13,6 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun
; X32-SSE-NEXT: subl $384, %esp # imm = 0x180
; X32-SSE-NEXT: movl 88(%ebp), %ecx
; X32-SSE-NEXT: movdqa 72(%ebp), %xmm0
-; X32-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
; X32-SSE-NEXT: xorps %xmm1, %xmm1
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
@@ -22,6 +21,7 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
+; X32-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
; X32-SSE-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
; X32-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
diff --git a/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll b/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll
index 00126d67532..49e29ac17a5 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll
@@ -693,20 +693,20 @@ define void @test_sdiv_pow2_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind {
; X86-NEXT: movdqa %xmm0, %xmm1
; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,1,3]
-; X86-NEXT: movdqa {{.*#+}} xmm2 = [31,0,31,0]
-; X86-NEXT: movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648]
-; X86-NEXT: movdqa %xmm3, %xmm4
-; X86-NEXT: psrlq %xmm2, %xmm4
+; X86-NEXT: movdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
+; X86-NEXT: movdqa {{.*#+}} xmm3 = [31,0,31,0]
+; X86-NEXT: movdqa %xmm2, %xmm4
+; X86-NEXT: psrlq %xmm3, %xmm4
; X86-NEXT: movl $31, %ecx
; X86-NEXT: movd %ecx, %xmm5
-; X86-NEXT: psrlq %xmm5, %xmm3
-; X86-NEXT: movsd {{.*#+}} xmm3 = xmm4[0],xmm3[1]
+; X86-NEXT: psrlq %xmm5, %xmm2
+; X86-NEXT: movsd {{.*#+}} xmm2 = xmm4[0],xmm2[1]
; X86-NEXT: movdqa %xmm1, %xmm4
-; X86-NEXT: psrlq %xmm2, %xmm4
+; X86-NEXT: psrlq %xmm3, %xmm4
; X86-NEXT: psrlq %xmm5, %xmm1
; X86-NEXT: movsd {{.*#+}} xmm1 = xmm4[0],xmm1[1]
-; X86-NEXT: xorpd %xmm3, %xmm1
-; X86-NEXT: psubq %xmm3, %xmm1
+; X86-NEXT: xorpd %xmm2, %xmm1
+; X86-NEXT: psubq %xmm2, %xmm1
; X86-NEXT: pand {{\.LCPI.*}}, %xmm1
; X86-NEXT: psrlq $29, %xmm1
; X86-NEXT: paddq %xmm0, %xmm1
diff --git a/llvm/test/CodeGen/X86/widened-broadcast.ll b/llvm/test/CodeGen/X86/widened-broadcast.ll
index 167128ae002..ce99d22dbbd 100644
--- a/llvm/test/CodeGen/X86/widened-broadcast.ll
+++ b/llvm/test/CodeGen/X86/widened-broadcast.ll
@@ -121,21 +121,10 @@ define <8 x i32> @load_splat_8i32_4i32_01010101(<4 x i32>* %ptr) nounwind uwtabl
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: retq
;
-; AVX1-LABEL: load_splat_8i32_4i32_01010101:
-; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: load_splat_8i32_4i32_01010101:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: load_splat_8i32_4i32_01010101:
-; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: load_splat_8i32_4i32_01010101:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
+; AVX-NEXT: retq
entry:
%ld = load <4 x i32>, <4 x i32>* %ptr
%ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -149,10 +138,21 @@ define <8 x i32> @load_splat_8i32_8i32_01010101(<8 x i32>* %ptr) nounwind uwtabl
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: retq
;
-; AVX-LABEL: load_splat_8i32_8i32_01010101:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
-; AVX-NEXT: retq
+; AVX1-LABEL: load_splat_8i32_8i32_01010101:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: load_splat_8i32_8i32_01010101:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: load_splat_8i32_8i32_01010101:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0
+; AVX512-NEXT: retq
entry:
%ld = load <8 x i32>, <8 x i32>* %ptr
%ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -246,21 +246,10 @@ define <16 x i16> @load_splat_16i16_8i16_0123012301230123(<8 x i16>* %ptr) nounw
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: retq
;
-; AVX1-LABEL: load_splat_16i16_8i16_0123012301230123:
-; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: load_splat_16i16_8i16_0123012301230123:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: load_splat_16i16_8i16_0123012301230123:
-; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: load_splat_16i16_8i16_0123012301230123:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
+; AVX-NEXT: retq
entry:
%ld = load <8 x i16>, <8 x i16>* %ptr
%ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3,i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
@@ -274,10 +263,21 @@ define <16 x i16> @load_splat_16i16_16i16_0101010101010101(<16 x i16>* %ptr) nou
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: retq
;
-; AVX-LABEL: load_splat_16i16_16i16_0101010101010101:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: vbroadcastss (%rdi), %ymm0
-; AVX-NEXT: retq
+; AVX1-LABEL: load_splat_16i16_16i16_0101010101010101:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,0,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: load_splat_16i16_16i16_0101010101010101:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vbroadcastss (%rdi), %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: load_splat_16i16_16i16_0101010101010101:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vbroadcastss (%rdi), %ymm0
+; AVX512-NEXT: retq
entry:
%ld = load <16 x i16>, <16 x i16>* %ptr
%ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -446,21 +446,10 @@ define <32 x i8> @load_splat_32i8_16i8_01234567012345670123456701234567(<16 x i8
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: retq
;
-; AVX1-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567:
-; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1]
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567:
-; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
+; AVX-NEXT: retq
entry:
%ld = load <16 x i8>, <16 x i8>* %ptr
%ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
OpenPOWER on IntegriCloud