summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-06-06 00:24:55 +0000
committerCraig Topper <craig.topper@intel.com>2018-06-06 00:24:55 +0000
commitf3914b74c1825f4deece3317542743109dabbf56 (patch)
treec6f2e80811bbed0f4fb57c2fd9f38ff3058ff11c /clang/lib/CodeGen
parentc797bb23b857e4521562073d408be2de8d15a270 (diff)
downloadbcm5719-llvm-f3914b74c1825f4deece3317542743109dabbf56.tar.gz
bcm5719-llvm-f3914b74c1825f4deece3317542743109dabbf56.zip
[X86] Add builtins for vector element insert and extract for different 128 and 256 bit vector types. Use them to implement the extract and insert intrinsics.
Previously we were just using extended vector operations in the header file. This unfortunately allowed non-constant indices to be used with the intrinsics. This is incompatible with gcc, icc, and MSVC. It also introduces a different performance characteristic because non-constant index gets lowered to a vector store and an element sized load. By adding the builtins we can check for the index to be a constant and ensure its in range of the vector element count. User code still has the option to use extended vector operations themselves if they need non-constant indexing. llvm-svn: 334057
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp25
1 files changed, 23 insertions, 2 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 6379f53d17f..ad2e1cf3b4b 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -8778,8 +8778,29 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return Builder.CreateBitCast(BuildVector(Ops),
llvm::Type::getX86_MMXTy(getLLVMContext()));
case X86::BI__builtin_ia32_vec_ext_v2si:
- return Builder.CreateExtractElement(Ops[0],
- cast<ConstantInt>(Ops[1])->getZExtValue());
+ case X86::BI__builtin_ia32_vec_ext_v16qi:
+ case X86::BI__builtin_ia32_vec_ext_v8hi:
+ case X86::BI__builtin_ia32_vec_ext_v4si:
+ case X86::BI__builtin_ia32_vec_ext_v4sf:
+ case X86::BI__builtin_ia32_vec_ext_v2di:
+ case X86::BI__builtin_ia32_vec_ext_v32qi:
+ case X86::BI__builtin_ia32_vec_ext_v16hi:
+ case X86::BI__builtin_ia32_vec_ext_v8si:
+ case X86::BI__builtin_ia32_vec_ext_v4di:
+ // These builtins exist so we can ensure the index is an ICE and in range.
+ // Otherwise we could just do this in the header file.
+ return Builder.CreateExtractElement(Ops[0], Ops[1]);
+ case X86::BI__builtin_ia32_vec_set_v16qi:
+ case X86::BI__builtin_ia32_vec_set_v8hi:
+ case X86::BI__builtin_ia32_vec_set_v4si:
+ case X86::BI__builtin_ia32_vec_set_v2di:
+ case X86::BI__builtin_ia32_vec_set_v32qi:
+ case X86::BI__builtin_ia32_vec_set_v16hi:
+ case X86::BI__builtin_ia32_vec_set_v8si:
+ case X86::BI__builtin_ia32_vec_set_v4di:
+ // These builtins exist so we can ensure the index is an ICE and in range.
+ // Otherwise we could just do this in the header file.
+ return Builder.CreateInsertElement(Ops[0], Ops[1], Ops[2]);
case X86::BI_mm_setcsr:
case X86::BI__builtin_ia32_ldmxcsr: {
Address Tmp = CreateMemTemp(E->getArg(0)->getType());
OpenPOWER on IntegriCloud