diff options
| author | Chandler Carruth <chandlerc@gmail.com> | 2017-06-06 02:15:31 +0000 |
|---|---|---|
| committer | Chandler Carruth <chandlerc@gmail.com> | 2017-06-06 02:15:31 +0000 |
| commit | 41ed4034ddeb3337e6f7cfecdf30983b3a231f5a (patch) | |
| tree | 60514f405696880bd29db0ce50711fe3cb6ae32b /llvm/test | |
| parent | 462f36d36d4593e30a1c765cd3c28a0c66633642 (diff) | |
| download | bcm5719-llvm-41ed4034ddeb3337e6f7cfecdf30983b3a231f5a.tar.gz bcm5719-llvm-41ed4034ddeb3337e6f7cfecdf30983b3a231f5a.zip | |
[x86] Revert the X86FoldTablesEmitter due to more miscompiles.
In testing, we've found yet another miscompile caused by the new tables.
And this one is even less clear how to fix (we could teach it to fold
a 16-bit load instead of the 32-bit load it wants, or block folding
entirely).
Also, the approach to excluding instructions seems increasingly to not
scale well.
I have left a more detailed analysis on the review log for the original
patch (https://reviews.llvm.org/D32684) along with suggested path
forward. I will land an additional test case that I wrote which covers
the code that was miscompiling (folding into the output of `pextrw`) in
a subsequent commit to keep this a pure revert.
For each commit reverted here, I've restricted the revert to the
non-test code touching the x86 fold table emission until the last commit
where I did revert the test updates. This means the *new* test cases
added for `insertps` and `xchg` remain untouched (and continue to pass).
Reverted commits:
r304540: [X86] Don't fold into memory operands into insertps in the ...
r304347: [TableGen] Adapt more places to getValueAsString now ...
r304163: [X86] Don't fold away the memory operand of an xchg.
r304123: Don't capture a temporary std::string in a StringRef.
r304122: Resubmit "[X86] Adding new LLVM TableGen backend that ..."
Original commit was in r304088, and after a string of fixes was reverted
previously in r304121 to fix build bots, and then re-landed in r304122.
llvm-svn: 304762
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll | 21 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-sqrt.ll | 18 |
3 files changed, 29 insertions, 16 deletions
diff --git a/llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll index 19305d0dad6..383ab21bd40 100644 --- a/llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll @@ -354,8 +354,9 @@ declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind rea define i32 @test_mm_crc32_u8(i32 %a0, i8 %a1) { ; X32-LABEL: test_mm_crc32_u8: ; X32: # BB#0: +; X32-NEXT: movb {{[0-9]+}}(%esp), %cl ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: crc32b {{[0-9]+}}(%esp), %eax +; X32-NEXT: crc32b %cl, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_crc32_u8: @@ -371,8 +372,9 @@ declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind readnone define i32 @test_mm_crc32_u16(i32 %a0, i16 %a1) { ; X32-LABEL: test_mm_crc32_u16: ; X32: # BB#0: +; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: crc32w {{[0-9]+}}(%esp), %eax +; X32-NEXT: crc32w %cx, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_crc32_u16: diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll index f937d484ce0..4165aea8794 100644 --- a/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll +++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll @@ -1651,9 +1651,26 @@ define <8 x float> @stack_fold_sqrtps_ymm(<8 x float> %a0) { } declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone -; TODO stack_fold_sqrtsd +define double @stack_fold_sqrtsd(double %a0) { + ;CHECK-LABEL: stack_fold_sqrtsd + ;CHECK: vsqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call double @llvm.sqrt.f64(double %a0) + ret double %2 +} +declare double @llvm.sqrt.f64(double) nounwind readnone + ; TODO stack_fold_sqrtsd_int -; TODO stack_fold_sqrtss + +define float @stack_fold_sqrtss(float %a0) { + ;CHECK-LABEL: stack_fold_sqrtss + ;CHECK: vsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call float @llvm.sqrt.f32(float %a0) + ret float %2 +} +declare float @llvm.sqrt.f32(float) nounwind readnone + ; TODO stack_fold_sqrtss_int define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) { diff --git a/llvm/test/CodeGen/X86/vector-sqrt.ll b/llvm/test/CodeGen/X86/vector-sqrt.ll index 13088b7fa5f..c5ac4466b5f 100644 --- a/llvm/test/CodeGen/X86/vector-sqrt.ll +++ b/llvm/test/CodeGen/X86/vector-sqrt.ll @@ -5,10 +5,8 @@ define <2 x double> @sqrtd2(double* nocapture readonly %v) local_unnamed_addr #0 { ; CHECK-LABEL: sqrtd2: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vsqrtsd (%rdi), %xmm0, %xmm0 +; CHECK-NEXT: vsqrtsd 8(%rdi), %xmm1, %xmm1 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq entry: @@ -29,14 +27,10 @@ declare double @sqrt(double) local_unnamed_addr #1 define <4 x float> @sqrtf4(float* nocapture readonly %v) local_unnamed_addr #0 { ; CHECK-LABEL: sqrtf4: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: vsqrtss %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: vsqrtss %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vsqrtss (%rdi), %xmm0, %xmm0 +; CHECK-NEXT: vsqrtss 4(%rdi), %xmm1, %xmm1 +; CHECK-NEXT: vsqrtss 8(%rdi), %xmm2, %xmm2 +; CHECK-NEXT: vsqrtss 12(%rdi), %xmm3, %xmm3 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0] |

