diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-05-23 22:13:02 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-05-23 22:13:02 +0000 |
| commit | 90770c7c76574b72c5807c5af87bfc6bec7078d4 (patch) | |
| tree | 12fdc931d2696a1ca603047445e916f5e088be1a | |
| parent | adcaef72382e367dcc6a12ab3980baa60b162489 (diff) | |
| download | bcm5719-llvm-90770c7c76574b72c5807c5af87bfc6bec7078d4.tar.gz bcm5719-llvm-90770c7c76574b72c5807c5af87bfc6bec7078d4.zip | |
[X86][SSE] Replace lossless i32/f32 to f64 conversion intrinsics with generic IR
Both the (V)CVTDQ2PD(Y) (i32 to f64) and (V)CVTPS2PD(Y) (f32 to f64) conversion instructions are lossless and can be safely represented as generic __builtin_convertvector calls instead of x86 intrinsics without affecting final codegen.
This patch removes the clang builtins and their use in the sse2/avx headers - a future patch will deal with removing the llvm intrinsics, but that will require a bit more work.
Differential Revision: http://reviews.llvm.org/D20528
llvm-svn: 270499
| -rw-r--r-- | clang/include/clang/Basic/BuiltinsX86.def | 4 | ||||
| -rw-r--r-- | clang/lib/Headers/avxintrin.h | 4 | ||||
| -rw-r--r-- | clang/lib/Headers/emmintrin.h | 6 | ||||
| -rw-r--r-- | clang/test/CodeGen/avx-builtins.c | 4 | ||||
| -rw-r--r-- | clang/test/CodeGen/builtins-x86.c | 4 | ||||
| -rw-r--r-- | clang/test/CodeGen/sse2-builtins.c | 6 | ||||
| -rw-r--r-- | clang/test/CodeGen/target-builtin-error-2.c | 6 |
7 files changed, 15 insertions, 19 deletions
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 59c7623e913..615e84aab6f 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -330,7 +330,6 @@ TARGET_BUILTIN(__builtin_ia32_movntdq, "vV2LLi*V2LLi", "", "sse2") TARGET_BUILTIN(__builtin_ia32_psadbw128, "V2LLiV16cV16c", "", "sse2") TARGET_BUILTIN(__builtin_ia32_sqrtpd, "V2dV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_sqrtsd, "V2dV2d", "", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvtdq2pd, "V2dV4i", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtdq2ps, "V4fV4i", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtpd2dq, "V2LLiV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "", "sse2") @@ -338,7 +337,6 @@ TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvtps2pd, "V2dV4f", "", "sse2") TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "", "sse2") TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "", "sse2") TARGET_BUILTIN(__builtin_ia32_lfence, "v", "", "sse2") @@ -466,11 +464,9 @@ TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "", "avx") TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "", "avx") TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "", "avx") TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "", "avx") -TARGET_BUILTIN(__builtin_ia32_cvtdq2pd256, "V4dV4i", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "", "avx") -TARGET_BUILTIN(__builtin_ia32_cvtps2pd256, "V4dV4f", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "", "avx") TARGET_BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "", "avx") diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index a71bd7a9934..bbbe87ff0d3 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -2050,7 +2050,7 @@ _mm256_insert_epi64(__m256i __a, long long __b, int const __imm) static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtepi32_pd(__m128i __a) { - return (__m256d)__builtin_ia32_cvtdq2pd256((__v4si) __a); + return (__m256d)__builtin_convertvector((__v4si)__a, __v4df); } /// \brief Converts a vector of [8 x i32] into a vector of [8 x float]. @@ -2102,7 +2102,7 @@ _mm256_cvtps_epi32(__m256 __a) static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtps_pd(__m128 __a) { - return (__m256d)__builtin_ia32_cvtps2pd256((__v4sf) __a); + return (__m256d)__builtin_convertvector((__v4sf)__a, __v4df); } static __inline __m128i __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 637948df8ad..f9a1a1a0059 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -386,13 +386,15 @@ _mm_cvtpd_ps(__m128d __a) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtps_pd(__m128 __a) { - return __builtin_ia32_cvtps2pd((__v4sf)__a); + return (__m128d) __builtin_convertvector( + __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepi32_pd(__m128i __a) { - return __builtin_ia32_cvtdq2pd((__v4si)__a); + return (__m128d) __builtin_convertvector( + __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df); } static __inline__ __m128i __DEFAULT_FN_ATTRS diff --git a/clang/test/CodeGen/avx-builtins.c b/clang/test/CodeGen/avx-builtins.c index ad187e805ca..9558421191f 100644 --- a/clang/test/CodeGen/avx-builtins.c +++ b/clang/test/CodeGen/avx-builtins.c @@ -250,7 +250,7 @@ __m128 test_mm_cmp_ss(__m128 A, __m128 B) { __m256d test_mm256_cvtepi32_pd(__m128i A) { // CHECK-LABEL: test_mm256_cvtepi32_pd - // CHECK: call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %{{.*}}) + // CHECK: sitofp <4 x i32> %{{.*}} to <4 x double> return _mm256_cvtepi32_pd(A); } @@ -280,7 +280,7 @@ __m256i test_mm256_cvtps_epi32(__m256 A) { __m256d test_mm256_cvtps_pd(__m128 A) { // CHECK-LABEL: test_mm256_cvtps_pd - // CHECK: call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %{{.*}}) + // CHECK: fpext <4 x float> %{{.*}} to <4 x double> return _mm256_cvtps_pd(A); } diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c index 67084b4b654..8b53c631544 100644 --- a/clang/test/CodeGen/builtins-x86.c +++ b/clang/test/CodeGen/builtins-x86.c @@ -325,7 +325,6 @@ void f0() { tmp_V2LLi = __builtin_ia32_psadbw128(tmp_V16c, tmp_V16c); tmp_V2d = __builtin_ia32_sqrtpd(tmp_V2d); tmp_V2d = __builtin_ia32_sqrtsd(tmp_V2d); - tmp_V2d = __builtin_ia32_cvtdq2pd(tmp_V4i); tmp_V4f = __builtin_ia32_cvtdq2ps(tmp_V4i); tmp_V2LLi = __builtin_ia32_cvtpd2dq(tmp_V2d); tmp_V2i = __builtin_ia32_cvtpd2pi(tmp_V2d); @@ -338,7 +337,6 @@ void f0() { tmp_LLi = __builtin_ia32_cvtsd2si64(tmp_V2d); #endif tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f); - tmp_V2d = __builtin_ia32_cvtps2pd(tmp_V4f); tmp_V4i = __builtin_ia32_cvttps2dq(tmp_V4f); (void) __builtin_ia32_clflush(tmp_vCp); (void) __builtin_ia32_lfence(); @@ -423,11 +421,9 @@ void f0() { tmp_V8f = __builtin_ia32_dpps256(tmp_V8f, tmp_V8f, 0x7); tmp_V4d = __builtin_ia32_cmppd256(tmp_V4d, tmp_V4d, 0); tmp_V8f = __builtin_ia32_cmpps256(tmp_V8f, tmp_V8f, 0); - tmp_V4d = __builtin_ia32_cvtdq2pd256(tmp_V4i); tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i); tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d); tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f); - tmp_V4d = __builtin_ia32_cvtps2pd256(tmp_V4f); tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d); tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d); tmp_V8i = __builtin_ia32_cvttps2dq256(tmp_V8f); diff --git a/clang/test/CodeGen/sse2-builtins.c b/clang/test/CodeGen/sse2-builtins.c index 9424c3316c9..3a9b6401807 100644 --- a/clang/test/CodeGen/sse2-builtins.c +++ b/clang/test/CodeGen/sse2-builtins.c @@ -415,7 +415,8 @@ int test_mm_comineq_sd(__m128d A, __m128d B) { __m128d test_mm_cvtepi32_pd(__m128i A) { // CHECK-LABEL: test_mm_cvtepi32_pd - // CHECK: call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %{{.*}}) + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> <i32 0, i32 1> + // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double> return _mm_cvtepi32_pd(A); } @@ -445,7 +446,8 @@ __m128i test_mm_cvtps_epi32(__m128 A) { __m128d test_mm_cvtps_pd(__m128 A) { // CHECK-LABEL: test_mm_cvtps_pd - // CHECK: call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %{{.*}}) + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 0, i32 1> + // CHECK: fpext <2 x float> %{{.*}} to <2 x double> return _mm_cvtps_pd(A); } diff --git a/clang/test/CodeGen/target-builtin-error-2.c b/clang/test/CodeGen/target-builtin-error-2.c index 949f2cc7846..2e2691a784e 100644 --- a/clang/test/CodeGen/target-builtin-error-2.c +++ b/clang/test/CodeGen/target-builtin-error-2.c @@ -5,9 +5,9 @@ // Since we do code generation on a function level this needs to error out since // the subtarget feature won't be available. -__m256d wombat(__m128i a) { +__m128 wombat(__m128i a) { if (__builtin_cpu_supports("avx")) - return __builtin_ia32_cvtdq2pd256((__v4si)a); // expected-error {{'__builtin_ia32_cvtdq2pd256' needs target feature avx}} + return __builtin_ia32_vpermilvarps((__v4sf) {0.0f, 1.0f, 2.0f, 3.0f}, (__v4si)a); // expected-error {{'__builtin_ia32_vpermilvarps' needs target feature avx}} else - return (__m256d){0, 0, 0, 0}; + return (__m128){0, 0}; } |

