diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-06-01 21:46:51 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-06-01 21:46:51 +0000 |
commit | 00880511b15621a95a506c80af325cf04da57d76 (patch) | |
tree | 1e6d2e225f4dba35923b835b625cb42a74e2d118 /clang/test/CodeGen/builtins-x86.c | |
parent | d04c6851681bc9b89ba98c1644cd62d7fb719ac2 (diff) | |
download | bcm5719-llvm-00880511b15621a95a506c80af325cf04da57d76.tar.gz bcm5719-llvm-00880511b15621a95a506c80af325cf04da57d76.zip |
[X86][SSE] Replace (V)CVTTPS2DQ and VCVTTPD2DQ truncating (round to zero) f32/f64 to i32 with generic IR (clang)
The 'cvtt' truncation (round to zero) conversions can be safely represented as generic __builtin_convertvector (fptosi) calls instead of x86 intrinsics. We already do this (implicitly) for the scalar equivalents.
Note: I looked at updating _mm_cvttpd_epi32 as well but this still requires a lot more backend work to correctly lower (both for debug and optimized builds).
Differential Revision: http://reviews.llvm.org/D20859
llvm-svn: 271436
Diffstat (limited to 'clang/test/CodeGen/builtins-x86.c')
-rw-r--r-- | clang/test/CodeGen/builtins-x86.c | 3 |
1 files changed, 0 insertions, 3 deletions
diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c index 6bfff11b781..24e491c0262 100644 --- a/clang/test/CodeGen/builtins-x86.c +++ b/clang/test/CodeGen/builtins-x86.c @@ -335,7 +335,6 @@ void f0() { tmp_LLi = __builtin_ia32_cvtsd2si64(tmp_V2d); #endif tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f); - tmp_V4i = __builtin_ia32_cvttps2dq(tmp_V4f); (void) __builtin_ia32_clflush(tmp_vCp); (void) __builtin_ia32_lfence(); (void) __builtin_ia32_mfence(); @@ -415,9 +414,7 @@ void f0() { tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i); tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d); tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f); - tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d); tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d); - tmp_V8i = __builtin_ia32_cvttps2dq256(tmp_V8f); tmp_V4d = __builtin_ia32_vperm2f128_pd256(tmp_V4d, tmp_V4d, 0x7); tmp_V8f = __builtin_ia32_vperm2f128_ps256(tmp_V8f, tmp_V8f, 0x7); tmp_V8i = __builtin_ia32_vperm2f128_si256(tmp_V8i, tmp_V8i, 0x7); |