summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2017-06-06 02:15:31 +0000
committerChandler Carruth <chandlerc@gmail.com>2017-06-06 02:15:31 +0000
commit41ed4034ddeb3337e6f7cfecdf30983b3a231f5a (patch)
tree60514f405696880bd29db0ce50711fe3cb6ae32b /llvm/test
parent462f36d36d4593e30a1c765cd3c28a0c66633642 (diff)
downloadbcm5719-llvm-41ed4034ddeb3337e6f7cfecdf30983b3a231f5a.tar.gz
bcm5719-llvm-41ed4034ddeb3337e6f7cfecdf30983b3a231f5a.zip
[x86] Revert the X86FoldTablesEmitter due to more miscompiles.
In testing, we've found yet another miscompile caused by the new tables. And this one is even less clear how to fix (we could teach it to fold a 16-bit load instead of the 32-bit load it wants, or block folding entirely). Also, the approach to excluding instructions seems increasingly to not scale well. I have left a more detailed analysis on the review log for the original patch (https://reviews.llvm.org/D32684) along with suggested path forward. I will land an additional test case that I wrote which covers the code that was miscompiling (folding into the output of `pextrw`) in a subsequent commit to keep this a pure revert. For each commit reverted here, I've restricted the revert to the non-test code touching the x86 fold table emission until the last commit where I did revert the test updates. This means the *new* test cases added for `insertps` and `xchg` remain untouched (and continue to pass). Reverted commits: r304540: [X86] Don't fold into memory operands into insertps in the ... r304347: [TableGen] Adapt more places to getValueAsString now ... r304163: [X86] Don't fold away the memory operand of an xchg. r304123: Don't capture a temporary std::string in a StringRef. r304122: Resubmit "[X86] Adding new LLVM TableGen backend that ..." Original commit was in r304088, and after a string of fixes was reverted previously in r304121 to fix build bots, and then re-landed in r304122. llvm-svn: 304762
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll6
-rw-r--r--llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll21
-rw-r--r--llvm/test/CodeGen/X86/vector-sqrt.ll18
3 files changed, 29 insertions, 16 deletions
diff --git a/llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll
index 19305d0dad6..383ab21bd40 100644
--- a/llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll
@@ -354,8 +354,9 @@ declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind rea
define i32 @test_mm_crc32_u8(i32 %a0, i8 %a1) {
; X32-LABEL: test_mm_crc32_u8:
; X32: # BB#0:
+; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: crc32b {{[0-9]+}}(%esp), %eax
+; X32-NEXT: crc32b %cl, %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm_crc32_u8:
@@ -371,8 +372,9 @@ declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind readnone
define i32 @test_mm_crc32_u16(i32 %a0, i16 %a1) {
; X32-LABEL: test_mm_crc32_u16:
; X32: # BB#0:
+; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: crc32w {{[0-9]+}}(%esp), %eax
+; X32-NEXT: crc32w %cx, %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm_crc32_u16:
diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll
index f937d484ce0..4165aea8794 100644
--- a/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll
+++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll
@@ -1651,9 +1651,26 @@ define <8 x float> @stack_fold_sqrtps_ymm(<8 x float> %a0) {
}
declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
-; TODO stack_fold_sqrtsd
+define double @stack_fold_sqrtsd(double %a0) {
+ ;CHECK-LABEL: stack_fold_sqrtsd
+ ;CHECK: vsqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call double @llvm.sqrt.f64(double %a0)
+ ret double %2
+}
+declare double @llvm.sqrt.f64(double) nounwind readnone
+
; TODO stack_fold_sqrtsd_int
-; TODO stack_fold_sqrtss
+
+define float @stack_fold_sqrtss(float %a0) {
+ ;CHECK-LABEL: stack_fold_sqrtss
+ ;CHECK: vsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call float @llvm.sqrt.f32(float %a0)
+ ret float %2
+}
+declare float @llvm.sqrt.f32(float) nounwind readnone
+
; TODO stack_fold_sqrtss_int
define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) {
diff --git a/llvm/test/CodeGen/X86/vector-sqrt.ll b/llvm/test/CodeGen/X86/vector-sqrt.ll
index 13088b7fa5f..c5ac4466b5f 100644
--- a/llvm/test/CodeGen/X86/vector-sqrt.ll
+++ b/llvm/test/CodeGen/X86/vector-sqrt.ll
@@ -5,10 +5,8 @@
define <2 x double> @sqrtd2(double* nocapture readonly %v) local_unnamed_addr #0 {
; CHECK-LABEL: sqrtd2:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vsqrtsd (%rdi), %xmm0, %xmm0
+; CHECK-NEXT: vsqrtsd 8(%rdi), %xmm1, %xmm1
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
entry:
@@ -29,14 +27,10 @@ declare double @sqrt(double) local_unnamed_addr #1
define <4 x float> @sqrtf4(float* nocapture readonly %v) local_unnamed_addr #0 {
; CHECK-LABEL: sqrtf4:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vsqrtss %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: vsqrtss %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; CHECK-NEXT: vsqrtss %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vsqrtss (%rdi), %xmm0, %xmm0
+; CHECK-NEXT: vsqrtss 4(%rdi), %xmm1, %xmm1
+; CHECK-NEXT: vsqrtss 8(%rdi), %xmm2, %xmm2
+; CHECK-NEXT: vsqrtss 12(%rdi), %xmm3, %xmm3
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
OpenPOWER on IntegriCloud