[X86][SSE] Replace lossless i32/f32 to f64 conversion intrinsics with generic IR

Both the (V)CVTDQ2PD(Y) (i32 to f64) and (V)CVTPS2PD(Y) (f32 to f64) conversion instructions are lossless and can be safely represented as generic __builtin_convertvector calls instead of x86 intrinsics without affecting final codegen. This patch removes the clang builtins and their use in the sse2/avx headers - a future patch will deal with removing the llvm intrinsics, but that will require a bit more work. Differential Revision: http://reviews.llvm.org/D20528 llvm-svn: 270499
author: Simon Pilgrim <llvm-dev@redking.me.uk> 2016-05-23 22:13:02 +0000
committer: Simon Pilgrim <llvm-dev@redking.me.uk> 2016-05-23 22:13:02 +0000
commit: 90770c7c76574b72c5807c5af87bfc6bec7078d4 (patch)
tree: 12fdc931d2696a1ca603047445e916f5e088be1a /clang/test
parent: adcaef72382e367dcc6a12ab3980baa60b162489 (diff)
download: bcm5719-llvm-90770c7c76574b72c5807c5af87bfc6bec7078d4.tar.gz
bcm5719-llvm-90770c7c76574b72c5807c5af87bfc6bec7078d4.zip
4 files changed, 9 insertions, 11 deletions
diff --git a/clang/test/CodeGen/avx-builtins.c b/clang/test/CodeGen/avx-builtins.c
index ad187e805ca..9558421191f 100644
--- a/clang/test/CodeGen/avx-builtins.c
+++ b/clang/test/CodeGen/avx-builtins.c
@@ -250,7 +250,7 @@ __m128 test_mm_cmp_ss(__m128 A, __m128 B) {
 
 __m256d test_mm256_cvtepi32_pd(__m128i A) {
   // CHECK-LABEL: test_mm256_cvtepi32_pd
-  // CHECK: call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %{{.*}})
+  // CHECK: sitofp <4 x i32> %{{.*}} to <4 x double>
   return _mm256_cvtepi32_pd(A);
 }
 
@@ -280,7 +280,7 @@ __m256i test_mm256_cvtps_epi32(__m256 A) {
 
 __m256d test_mm256_cvtps_pd(__m128 A) {
   // CHECK-LABEL: test_mm256_cvtps_pd
-  // CHECK: call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %{{.*}})
+  // CHECK: fpext <4 x float> %{{.*}} to <4 x double>
   return _mm256_cvtps_pd(A);
 }
 
diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c
index 67084b4b654..8b53c631544 100644
--- a/clang/test/CodeGen/builtins-x86.c
+++ b/clang/test/CodeGen/builtins-x86.c
@@ -325,7 +325,6 @@ void f0() {
   tmp_V2LLi = __builtin_ia32_psadbw128(tmp_V16c, tmp_V16c);
   tmp_V2d = __builtin_ia32_sqrtpd(tmp_V2d);
   tmp_V2d = __builtin_ia32_sqrtsd(tmp_V2d);
-  tmp_V2d = __builtin_ia32_cvtdq2pd(tmp_V4i);
   tmp_V4f = __builtin_ia32_cvtdq2ps(tmp_V4i);
   tmp_V2LLi = __builtin_ia32_cvtpd2dq(tmp_V2d);
   tmp_V2i = __builtin_ia32_cvtpd2pi(tmp_V2d);
@@ -338,7 +337,6 @@ void f0() {
   tmp_LLi = __builtin_ia32_cvtsd2si64(tmp_V2d);
 #endif
   tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f);
-  tmp_V2d = __builtin_ia32_cvtps2pd(tmp_V4f);
   tmp_V4i = __builtin_ia32_cvttps2dq(tmp_V4f);
   (void) __builtin_ia32_clflush(tmp_vCp);
   (void) __builtin_ia32_lfence();
@@ -423,11 +421,9 @@ void f0() {
   tmp_V8f = __builtin_ia32_dpps256(tmp_V8f, tmp_V8f, 0x7);
   tmp_V4d = __builtin_ia32_cmppd256(tmp_V4d, tmp_V4d, 0);
   tmp_V8f = __builtin_ia32_cmpps256(tmp_V8f, tmp_V8f, 0);
-  tmp_V4d = __builtin_ia32_cvtdq2pd256(tmp_V4i);
   tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i);
   tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d);
   tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f);
-  tmp_V4d = __builtin_ia32_cvtps2pd256(tmp_V4f);
   tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d);
   tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d);
   tmp_V8i = __builtin_ia32_cvttps2dq256(tmp_V8f);
diff --git a/clang/test/CodeGen/sse2-builtins.c b/clang/test/CodeGen/sse2-builtins.c
index 9424c3316c9..3a9b6401807 100644
--- a/clang/test/CodeGen/sse2-builtins.c
+++ b/clang/test/CodeGen/sse2-builtins.c
@@ -415,7 +415,8 @@ int test_mm_comineq_sd(__m128d A, __m128d B) {
 
 __m128d test_mm_cvtepi32_pd(__m128i A) {
   // CHECK-LABEL: test_mm_cvtepi32_pd
-  // CHECK: call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %{{.*}})
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> <i32 0, i32 1>
+  // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double>
   return _mm_cvtepi32_pd(A);
 }
 
@@ -445,7 +446,8 @@ __m128i test_mm_cvtps_epi32(__m128 A) {
 
 __m128d test_mm_cvtps_pd(__m128 A) {
   // CHECK-LABEL: test_mm_cvtps_pd
-  // CHECK: call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %{{.*}})
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 0, i32 1>
+  // CHECK: fpext <2 x float> %{{.*}} to <2 x double>
   return _mm_cvtps_pd(A);
 }
 
diff --git a/clang/test/CodeGen/target-builtin-error-2.c b/clang/test/CodeGen/target-builtin-error-2.c
index 949f2cc7846..2e2691a784e 100644
--- a/clang/test/CodeGen/target-builtin-error-2.c
+++ b/clang/test/CodeGen/target-builtin-error-2.c
@@ -5,9 +5,9 @@
 
 // Since we do code generation on a function level this needs to error out since
 // the subtarget feature won't be available.
-__m256d wombat(__m128i a) {
+__m128 wombat(__m128i a) {
   if (__builtin_cpu_supports("avx"))
-    return __builtin_ia32_cvtdq2pd256((__v4si)a); // expected-error {{'__builtin_ia32_cvtdq2pd256' needs target feature avx}}
+    return __builtin_ia32_vpermilvarps((__v4sf) {0.0f, 1.0f, 2.0f, 3.0f}, (__v4si)a); // expected-error {{'__builtin_ia32_vpermilvarps' needs target feature avx}}
   else
-    return (__m256d){0, 0, 0, 0};
+    return (__m128){0, 0};
 }
author	Simon Pilgrim <llvm-dev@redking.me.uk>	2016-05-23 22:13:02 +0000
committer	Simon Pilgrim <llvm-dev@redking.me.uk>	2016-05-23 22:13:02 +0000
commit	90770c7c76574b72c5807c5af87bfc6bec7078d4 (patch)
tree	12fdc931d2696a1ca603047445e916f5e088be1a /clang/test
parent	adcaef72382e367dcc6a12ab3980baa60b162489 (diff)
download	bcm5719-llvm-90770c7c76574b72c5807c5af87bfc6bec7078d4.tar.gz bcm5719-llvm-90770c7c76574b72c5807c5af87bfc6bec7078d4.zip