summaryrefslogtreecommitdiffstats
path: root/clang/test/CodeGen/builtins-x86.c
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-06-01 21:46:51 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-06-01 21:46:51 +0000
commit00880511b15621a95a506c80af325cf04da57d76 (patch)
tree1e6d2e225f4dba35923b835b625cb42a74e2d118 /clang/test/CodeGen/builtins-x86.c
parentd04c6851681bc9b89ba98c1644cd62d7fb719ac2 (diff)
downloadbcm5719-llvm-00880511b15621a95a506c80af325cf04da57d76.tar.gz
bcm5719-llvm-00880511b15621a95a506c80af325cf04da57d76.zip
[X86][SSE] Replace (V)CVTTPS2DQ and VCVTTPD2DQ truncating (round to zero) f32/f64 to i32 with generic IR (clang)
The 'cvtt' truncation (round to zero) conversions can be safely represented as generic __builtin_convertvector (fptosi) calls instead of x86 intrinsics. We already do this (implicitly) for the scalar equivalents. Note: I looked at updating _mm_cvttpd_epi32 as well but this still requires a lot more backend work to correctly lower (both for debug and optimized builds). Differential Revision: http://reviews.llvm.org/D20859 llvm-svn: 271436
Diffstat (limited to 'clang/test/CodeGen/builtins-x86.c')
-rw-r--r--clang/test/CodeGen/builtins-x86.c3
1 files changed, 0 insertions, 3 deletions
diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c
index 6bfff11b781..24e491c0262 100644
--- a/clang/test/CodeGen/builtins-x86.c
+++ b/clang/test/CodeGen/builtins-x86.c
@@ -335,7 +335,6 @@ void f0() {
tmp_LLi = __builtin_ia32_cvtsd2si64(tmp_V2d);
#endif
tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f);
- tmp_V4i = __builtin_ia32_cvttps2dq(tmp_V4f);
(void) __builtin_ia32_clflush(tmp_vCp);
(void) __builtin_ia32_lfence();
(void) __builtin_ia32_mfence();
@@ -415,9 +414,7 @@ void f0() {
tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i);
tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d);
tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f);
- tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d);
tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d);
- tmp_V8i = __builtin_ia32_cvttps2dq256(tmp_V8f);
tmp_V4d = __builtin_ia32_vperm2f128_pd256(tmp_V4d, tmp_V4d, 0x7);
tmp_V8f = __builtin_ia32_vperm2f128_ps256(tmp_V8f, tmp_V8f, 0x7);
tmp_V8i = __builtin_ia32_vperm2f128_si256(tmp_V8i, tmp_V8i, 0x7);
OpenPOWER on IntegriCloud