summaryrefslogtreecommitdiffstats
path: root/clang/test
diff options
context:
space:
mode:
authorBill Wendling <isanbard@gmail.com>2011-05-12 19:02:15 +0000
committerBill Wendling <isanbard@gmail.com>2011-05-12 19:02:15 +0000
commite106c3481784ecfda9cb014cb2cfea5b3d13bbc9 (patch)
treef258b9fd8a6a63d651fe1a1fc52cfcafd0acd20d /clang/test
parent3f125fe2eaab19eac991fbef562c85896b725e14 (diff)
downloadbcm5719-llvm-e106c3481784ecfda9cb014cb2cfea5b3d13bbc9.tar.gz
bcm5719-llvm-e106c3481784ecfda9cb014cb2cfea5b3d13bbc9.zip
LLVM doesn't always optimize away the four loads from this:
(__m128){ p[0], p[1], p[2], p[3] } which produces really bad code. This could be done in instcombine, but it's probably better to do it in the front-end instead. <rdar://problem/9424836> llvm-svn: 131237
Diffstat (limited to 'clang/test')
-rw-r--r--clang/test/CodeGen/builtins-x86.c2
1 files changed, 2 insertions, 0 deletions
diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c
index bb63048b616..190fa55c575 100644
--- a/clang/test/CodeGen/builtins-x86.c
+++ b/clang/test/CodeGen/builtins-x86.c
@@ -273,6 +273,7 @@ void f0() {
#endif
tmp_V2i = __builtin_ia32_cvttps2pi(tmp_V4f);
(void) __builtin_ia32_maskmovq(tmp_V8c, tmp_V8c, tmp_cp);
+ tmp_V4f = __builtin_ia32_loadups(tmp_fCp);
(void) __builtin_ia32_storeups(tmp_fp, tmp_V4f);
(void) __builtin_ia32_storehps(tmp_V2ip, tmp_V4f);
(void) __builtin_ia32_storelps(tmp_V2ip, tmp_V4f);
@@ -290,6 +291,7 @@ void f0() {
tmp_V4f = __builtin_ia32_sqrtps(tmp_V4f);
tmp_V4f = __builtin_ia32_sqrtss(tmp_V4f);
(void) __builtin_ia32_maskmovdqu(tmp_V16c, tmp_V16c, tmp_cp);
+ tmp_V2d = __builtin_ia32_loadupd(tmp_dCp);
(void) __builtin_ia32_storeupd(tmp_dp, tmp_V2d);
tmp_i = __builtin_ia32_movmskpd(tmp_V2d);
tmp_i = __builtin_ia32_pmovmskb128(tmp_V16c);
OpenPOWER on IntegriCloud