diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2018-11-25 16:54:43 +0000 |
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2018-11-25 16:54:43 +0000 |
| commit | 2e5a25c1708260cfbadb4e799ee86202b0105f74 (patch) | |
| tree | 758056859af173e7c68504e66a74e276078350c9 | |
| parent | 6615a7132ab88d6c16a220322b22be4280b001f9 (diff) | |
| download | bcm5719-llvm-2e5a25c1708260cfbadb4e799ee86202b0105f74.tar.gz bcm5719-llvm-2e5a25c1708260cfbadb4e799ee86202b0105f74.zip | |
[x86] add tests for select-of-fp-constants; NFC
There are many options here depending on subtarget,
but we are uniformly relying on a transform that was
driven by performance for a 32-bit SSE2 target in 2009.
Note: The same motivation was apparently used to do this
transform for *all* targets, so non-x86 may want to look
at this too.
llvm-svn: 347525
| -rw-r--r-- | llvm/test/CodeGen/X86/2009-03-07-FPConstSelect.ll | 18 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/select-of-fp-constants.ll | 96 |
2 files changed, 96 insertions, 18 deletions
diff --git a/llvm/test/CodeGen/X86/2009-03-07-FPConstSelect.ll b/llvm/test/CodeGen/X86/2009-03-07-FPConstSelect.ll deleted file mode 100644 index d38e8b2d638..00000000000 --- a/llvm/test/CodeGen/X86/2009-03-07-FPConstSelect.ll +++ /dev/null @@ -1,18 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i386-- | FileCheck %s - -; This should do a single load into the fp stack for the return, not diddle with xmm registers. - -define float @f(i32 %x) nounwind readnone { -; CHECK-LABEL: f: -; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; CHECK-NEXT: sete %al -; CHECK-NEXT: flds {{\.LCPI.*}}(,%eax,4) -; CHECK-NEXT: retl - %c = icmp eq i32 %x, 0 - %r = select i1 %c, float 42.0, float 23.0 - ret float %r -} - diff --git a/llvm/test/CodeGen/X86/select-of-fp-constants.ll b/llvm/test/CodeGen/X86/select-of-fp-constants.ll new file mode 100644 index 00000000000..84b0bc13441 --- /dev/null +++ b/llvm/test/CodeGen/X86/select-of-fp-constants.ll @@ -0,0 +1,96 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-- -mattr=sse2 | FileCheck %s --check-prefixes=X32,X32_SSE,X32_SSE2 +; RUN: llc < %s -mtriple=i386-- -mattr=sse4.1 | FileCheck %s --check-prefixes=X32,X32_SSE,X32_SSE4 +; RUN: llc < %s -mtriple=i386-- -mattr=avx2 | FileCheck %s --check-prefixes=X32,X32_AVX,X32_AVX2 +; RUN: llc < %s -mtriple=i386-- -mattr=avx512f | FileCheck %s --check-prefixes=X32,X32_AVX,X32_AVX512F +; RUN: llc < %s -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=X64,X64_SSE,X64_SSE2 +; RUN: llc < %s -mtriple=x86_64-- -mattr=sse4.1 | FileCheck %s --check-prefixes=X64,X64_SSE,X64_SSE4 +; RUN: llc < %s -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=X64,X64_AVX,X64_AVX2 +; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64_AVX,X64_AVX512F + +; This should do a single load into the fp stack for the return, not diddle with xmm registers. + +define float @icmp_select_fp_constants(i32 %x) nounwind readnone { +; X32-LABEL: icmp_select_fp_constants: +; X32: # %bb.0: +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; X32-NEXT: sete %al +; X32-NEXT: flds {{\.LCPI.*}}(,%eax,4) +; X32-NEXT: retl +; +; X64_SSE-LABEL: icmp_select_fp_constants: +; X64_SSE: # %bb.0: +; X64_SSE-NEXT: xorl %eax, %eax +; X64_SSE-NEXT: testl %edi, %edi +; X64_SSE-NEXT: sete %al +; X64_SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64_SSE-NEXT: retq +; +; X64_AVX-LABEL: icmp_select_fp_constants: +; X64_AVX: # %bb.0: +; X64_AVX-NEXT: xorl %eax, %eax +; X64_AVX-NEXT: testl %edi, %edi +; X64_AVX-NEXT: sete %al +; X64_AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64_AVX-NEXT: retq + %c = icmp eq i32 %x, 0 + %r = select i1 %c, float 42.0, float 23.0 + ret float %r +} + +define float @fcmp_select_fp_constants(float %x) nounwind readnone { +; X32_SSE-LABEL: fcmp_select_fp_constants: +; X32_SSE: # %bb.0: +; X32_SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32_SSE-NEXT: cmpneqss {{[0-9]+}}(%esp), %xmm0 +; X32_SSE-NEXT: movd %xmm0, %eax +; X32_SSE-NEXT: andl $1, %eax +; X32_SSE-NEXT: flds {{\.LCPI.*}}(,%eax,4) +; X32_SSE-NEXT: retl +; +; X32_AVX2-LABEL: fcmp_select_fp_constants: +; X32_AVX2: # %bb.0: +; X32_AVX2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32_AVX2-NEXT: vcmpneqss {{[0-9]+}}(%esp), %xmm0, %xmm0 +; X32_AVX2-NEXT: vmovd %xmm0, %eax +; X32_AVX2-NEXT: andl $1, %eax +; X32_AVX2-NEXT: flds {{\.LCPI.*}}(,%eax,4) +; X32_AVX2-NEXT: retl +; +; X32_AVX512F-LABEL: fcmp_select_fp_constants: +; X32_AVX512F: # %bb.0: +; X32_AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32_AVX512F-NEXT: vcmpneqss {{[0-9]+}}(%esp), %xmm0, %k0 +; X32_AVX512F-NEXT: kmovw %k0, %eax +; X32_AVX512F-NEXT: flds {{\.LCPI.*}}(,%eax,4) +; X32_AVX512F-NEXT: retl +; +; X64_SSE-LABEL: fcmp_select_fp_constants: +; X64_SSE: # %bb.0: +; X64_SSE-NEXT: cmpneqss {{.*}}(%rip), %xmm0 +; X64_SSE-NEXT: movd %xmm0, %eax +; X64_SSE-NEXT: andl $1, %eax +; X64_SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64_SSE-NEXT: retq +; +; X64_AVX2-LABEL: fcmp_select_fp_constants: +; X64_AVX2: # %bb.0: +; X64_AVX2-NEXT: vcmpneqss {{.*}}(%rip), %xmm0, %xmm0 +; X64_AVX2-NEXT: vmovd %xmm0, %eax +; X64_AVX2-NEXT: andl $1, %eax +; X64_AVX2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64_AVX2-NEXT: retq +; +; X64_AVX512F-LABEL: fcmp_select_fp_constants: +; X64_AVX512F: # %bb.0: +; X64_AVX512F-NEXT: vcmpneqss {{.*}}(%rip), %xmm0, %k0 +; X64_AVX512F-NEXT: kmovw %k0, %eax +; X64_AVX512F-NEXT: movzwl %ax, %eax +; X64_AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64_AVX512F-NEXT: retq + %c = fcmp une float %x, -4.0 + %r = select i1 %c, float 42.0, float 23.0 + ret float %r +} + |

