[X86] Fix PR30926 - Add patterns for (v)cvtsi2s{s,d} and (v)cvtsd2s{s,d}

The code emiited by Clang's intrinsics for (v)cvtsi2ss, (v)cvtsi2sd, (v)cvtsd2ss and (v)cvtss2sd is lowered to a code sequence that includes redundant (v)movss/(v)movsd instructions. This patch adds patterns for optimizing these sequences. Differential revision: https://reviews.llvm.org/D28455 llvm-svn: 291660
author: Elad Cohen <elad2.cohen@intel.com> 2017-01-11 09:11:48 +0000
committer: Elad Cohen <elad2.cohen@intel.com> 2017-01-11 09:11:48 +0000
commit: 0c2601073e5d655817146296ba3f7740cfc97ce8 (patch)
tree: 2e4185c4ee4ff6f6c7c845505beae0d051a8a7ea /llvm/test
parent: 81d0f17055e9f0b8c1d41606fc5bfc1d9e4126ea (diff)
download: bcm5719-llvm-0c2601073e5d655817146296ba3f7740cfc97ce8.tar.gz
bcm5719-llvm-0c2601073e5d655817146296ba3f7740cfc97ce8.zip
4 files changed, 108 insertions, 9 deletions
diff --git a/llvm/test/CodeGen/X86/avx-cvt.ll b/llvm/test/CodeGen/X86/avx-cvt.ll
index c8e806890d0..a7cd8cf2398 100644
--- a/llvm/test/CodeGen/X86/avx-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx-cvt.ll
@@ -62,6 +62,17 @@ define <8 x float> @fptrunc00(<8 x double> %b) nounwind {
   ret <8 x float> %a
 }
 
+define <4 x float> @fptrunc01(<2 x double> %a0, <4 x float> %a1) nounwind {
+; CHECK-LABEL: fptrunc01:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vcvtsd2ss %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %ext = extractelement <2 x double> %a0, i32 0
+  %cvt = fptrunc double %ext to float
+  %res = insertelement <4 x float> %a1, float %cvt, i32 0
+  ret <4 x float> %res
+}
+
 define <4 x double> @fpext00(<4 x float> %b) nounwind {
 ; CHECK-LABEL: fpext00:
 ; CHECK:       # BB#0:
@@ -71,6 +82,17 @@ define <4 x double> @fpext00(<4 x float> %b) nounwind {
   ret <4 x double> %a
 }
 
+define <2 x double> @fpext01(<2 x double> %a0, <4 x float> %a1) nounwind {
+; CHECK-LABEL: fpext01:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %ext = extractelement <4 x float> %a1, i32 0
+  %cvt = fpext float %ext to double
+  %res = insertelement <2 x double> %a0, double %cvt, i32 0
+  ret <2 x double> %res
+}
+
 define double @funcA(i64* nocapture %e) nounwind uwtable readonly ssp {
 ; CHECK-LABEL: funcA:
 ; CHECK:       # BB#0:
diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll
index 5e50a3aef2f..c968f16678a 100644
--- a/llvm/test/CodeGen/X86/avx512-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx512-cvt.ll
@@ -448,6 +448,17 @@ define <4 x float> @fptrunc02(<4 x double> %b, <4 x i1> %mask) {
   ret <4 x float> %c
 }
 
+define <4 x float> @fptrunc03(<2 x double> %a0, <4 x float> %a1) nounwind {
+; ALL-LABEL: fptrunc03:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vcvtsd2ss %xmm0, %xmm1, %xmm0
+; ALL-NEXT:    retq
+  %ext = extractelement <2 x double> %a0, i32 0
+  %cvt = fptrunc double %ext to float
+  %res = insertelement <4 x float> %a1, float %cvt, i32 0
+  ret <4 x float> %res
+}
+
 define <8 x double> @fpext00(<8 x float> %b) nounwind {
 ; ALL-LABEL: fpext00:
 ; ALL:       ## BB#0:
@@ -476,6 +487,17 @@ define <4 x double> @fpext01(<4 x float> %b, <4 x double>%b1, <4 x double>%a1) {
   ret <4 x double> %c
 }
 
+define <2 x double> @fpext02(<2 x double> %a0, <4 x float> %a1) nounwind {
+; ALL-LABEL: fpext02:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0
+; ALL-NEXT:    retq
+  %ext = extractelement <4 x float> %a1, i32 0
+  %cvt = fpext float %ext to double
+  %res = insertelement <2 x double> %a0, double %cvt, i32 0
+  ret <2 x double> %res
+}
+
 define double @funcA(i64* nocapture %e) {
 ; ALL-LABEL: funcA:
 ; ALL:       ## BB#0: ## %entry
diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
index 4af9758f122..972a33f13cd 100644
--- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
@@ -1257,15 +1257,12 @@ define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind {
 define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind {
 ; X32-LABEL: test_mm_cvtsi32_sd:
 ; X32:       # BB#0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    cvtsi2sdl %eax, %xmm1
-; X32-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; X32-NEXT:    cvtsi2sdl {{[0-9]+}}(%esp), %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_cvtsi32_sd:
 ; X64:       # BB#0:
-; X64-NEXT:    cvtsi2sdl %edi, %xmm1
-; X64-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; X64-NEXT:    cvtsi2sdl %edi, %xmm0
 ; X64-NEXT:    retq
   %cvt = sitofp i32 %a1 to double
   %res = insertelement <2 x double> %a0, double %cvt, i32 0
@@ -1293,14 +1290,12 @@ define <2 x i64> @test_mm_cvtsi32_si128(i32 %a0) nounwind {
 define <2 x double> @test_mm_cvtss_sd(<2 x double> %a0, <4 x float> %a1) nounwind {
 ; X32-LABEL: test_mm_cvtss_sd:
 ; X32:       # BB#0:
-; X32-NEXT:    cvtss2sd %xmm1, %xmm1
-; X32-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; X32-NEXT:    cvtss2sd %xmm1, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_cvtss_sd:
 ; X64:       # BB#0:
-; X64-NEXT:    cvtss2sd %xmm1, %xmm1
-; X64-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; X64-NEXT:    cvtss2sd %xmm1, %xmm0
 ; X64-NEXT:    retq
   %ext = extractelement <4 x float> %a1, i32 0
   %cvt = fpext float %ext to double
diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
index 6a81cdc490f..923af1216d0 100644
--- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll
+++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
@@ -4818,3 +4818,63 @@ define void @aggregate_sitofp_8i16_to_8f32(%Arguments* nocapture readonly %a0) {
  store <8 x float> %4, <8 x float>* %3, align 32
  ret void
 }
+
+define <2 x double> @sitofp_i32_to_2f64(<2 x double> %a0, i32 %a1) nounwind {
+; SSE-LABEL: sitofp_i32_to_2f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    cvtsi2sdl %edi, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: sitofp_i32_to_2f64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %cvt = sitofp i32 %a1 to double
+  %res = insertelement <2 x double> %a0, double %cvt, i32 0
+  ret <2 x double> %res
+}
+
+define <4 x float> @sitofp_i32_to_4f32(<4 x float> %a0, i32 %a1) nounwind {
+; SSE-LABEL: sitofp_i32_to_4f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    cvtsi2ssl %edi, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: sitofp_i32_to_4f32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vcvtsi2ssl %edi, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %cvt = sitofp i32 %a1 to float
+  %res = insertelement <4 x float> %a0, float %cvt, i32 0
+  ret <4 x float> %res
+}
+
+define <2 x double> @sitofp_i64_to_2f64(<2 x double> %a0, i64 %a1) nounwind {
+; SSE-LABEL: sitofp_i64_to_2f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    cvtsi2sdq %rdi, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: sitofp_i64_to_2f64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %cvt = sitofp i64 %a1 to double
+  %res = insertelement <2 x double> %a0, double %cvt, i32 0
+  ret <2 x double> %res
+}
+
+define <4 x float> @sitofp_i64_to_4f32(<4 x float> %a0, i64 %a1) nounwind {
+; SSE-LABEL: sitofp_i64_to_4f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    cvtsi2ssq %rdi, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: sitofp_i64_to_4f32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %cvt = sitofp i64 %a1 to float
+  %res = insertelement <4 x float> %a0, float %cvt, i32 0
+  ret <4 x float> %res
+}
author	Elad Cohen <elad2.cohen@intel.com>	2017-01-11 09:11:48 +0000
committer	Elad Cohen <elad2.cohen@intel.com>	2017-01-11 09:11:48 +0000
commit	0c2601073e5d655817146296ba3f7740cfc97ce8 (patch)
tree	2e4185c4ee4ff6f6c7c845505beae0d051a8a7ea /llvm/test
parent	81d0f17055e9f0b8c1d41606fc5bfc1d9e4126ea (diff)
download	bcm5719-llvm-0c2601073e5d655817146296ba3f7740cfc97ce8.tar.gz bcm5719-llvm-0c2601073e5d655817146296ba3f7740cfc97ce8.zip