From e106c3481784ecfda9cb014cb2cfea5b3d13bbc9 Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Thu, 12 May 2011 19:02:15 +0000
Subject: LLVM doesn't always optimize away the four loads from this:

     (__m128){ p[0], p[1], p[2], p[3] }

which produces really bad code. This could be done in instcombine, but it's
probably better to do it in the front-end instead.
<rdar://problem/9424836>

llvm-svn: 131237
---
 clang/lib/CodeGen/CGBuiltin.cpp | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'clang/lib/CodeGen/CGBuiltin.cpp')

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 46546177ae5..494dfaeff77 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2143,6 +2143,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
     return llvm::Constant::getNullValue(ConvertType(E->getType()));
   }
+  case X86::BI__builtin_ia32_loadups:
+  case X86::BI__builtin_ia32_loadupd:
   case X86::BI__builtin_ia32_loaddqu: {
     const llvm::Type *VecTy = ConvertType(E->getType());
     const llvm::Type *IntTy = llvm::IntegerType::get(getLLVMContext(), 128);
-- 
cgit v1.2.3