summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen/CGBuiltin.cpp
diff options
context:
space:
mode:
authorJohn McCall <rjmccall@apple.com>2018-06-01 21:34:26 +0000
committerJohn McCall <rjmccall@apple.com>2018-06-01 21:34:26 +0000
commit280c6560317fcb73872e340621861dfd6ecb9333 (patch)
tree5633ce15ae30163a63376c4d5c4ff6b924a8fd8e /clang/lib/CodeGen/CGBuiltin.cpp
parentb8d861c27a1bdf8576f964a3fac533c3df7fe39a (diff)
downloadbcm5719-llvm-280c6560317fcb73872e340621861dfd6ecb9333.tar.gz
bcm5719-llvm-280c6560317fcb73872e340621861dfd6ecb9333.zip
Cap "voluntary" vector alignment at 16 for all Darwin platforms.
This fixes two major problems: - We were not capping vector alignment as desired on 32-bit ARM. - We were using different alignments based on the AVX settings on Intel, so we did not have a consistent ABI. This is an ABI break, but we think we can get away with it because vectors tend to be used mostly in inline code (which is why not having a consistent ABI has not proven disastrous on Intel). Intel's AVX types are specified as having 32-byte / 64-byte alignment, so align them explicitly instead of relying on the base ABI rule. Note that this sort of attribute is stripped from template arguments in template substitution, so there's a possibility that code templated over vectors will produce inadequately-aligned objects. The right long-term solution for this is for alignment attributes to be interpreted as true qualifiers and thus preserved in the canonical type. llvm-svn: 333791
Diffstat (limited to 'clang/lib/CodeGen/CGBuiltin.cpp')
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp37
1 files changed, 19 insertions, 18 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 2dd9c63ffab..b0ef01e63e4 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -8918,18 +8918,20 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_movdqa64store128_mask:
case X86::BI__builtin_ia32_storeaps128_mask:
case X86::BI__builtin_ia32_storeapd128_mask:
+ return EmitX86MaskedStore(*this, Ops, 16);
+
case X86::BI__builtin_ia32_movdqa32store256_mask:
case X86::BI__builtin_ia32_movdqa64store256_mask:
case X86::BI__builtin_ia32_storeaps256_mask:
case X86::BI__builtin_ia32_storeapd256_mask:
+ return EmitX86MaskedStore(*this, Ops, 32);
+
case X86::BI__builtin_ia32_movdqa32store512_mask:
case X86::BI__builtin_ia32_movdqa64store512_mask:
case X86::BI__builtin_ia32_storeaps512_mask:
- case X86::BI__builtin_ia32_storeapd512_mask: {
- unsigned Align =
- getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
- return EmitX86MaskedStore(*this, Ops, Align);
- }
+ case X86::BI__builtin_ia32_storeapd512_mask:
+ return EmitX86MaskedStore(*this, Ops, 64);
+
case X86::BI__builtin_ia32_loadups128_mask:
case X86::BI__builtin_ia32_loadups256_mask:
case X86::BI__builtin_ia32_loadups512_mask:
@@ -8950,26 +8952,25 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_loaddqudi512_mask:
return EmitX86MaskedLoad(*this, Ops, 1);
+ case X86::BI__builtin_ia32_loadaps128_mask:
+ case X86::BI__builtin_ia32_loadapd128_mask:
case X86::BI__builtin_ia32_loadss128_mask:
case X86::BI__builtin_ia32_loadsd128_mask:
- return EmitX86MaskedLoad(*this, Ops, 1);
+ case X86::BI__builtin_ia32_movdqa32load128_mask:
+ case X86::BI__builtin_ia32_movdqa64load128_mask:
+ return EmitX86MaskedLoad(*this, Ops, 16);
- case X86::BI__builtin_ia32_loadaps128_mask:
case X86::BI__builtin_ia32_loadaps256_mask:
- case X86::BI__builtin_ia32_loadaps512_mask:
- case X86::BI__builtin_ia32_loadapd128_mask:
case X86::BI__builtin_ia32_loadapd256_mask:
- case X86::BI__builtin_ia32_loadapd512_mask:
- case X86::BI__builtin_ia32_movdqa32load128_mask:
case X86::BI__builtin_ia32_movdqa32load256_mask:
- case X86::BI__builtin_ia32_movdqa32load512_mask:
- case X86::BI__builtin_ia32_movdqa64load128_mask:
case X86::BI__builtin_ia32_movdqa64load256_mask:
- case X86::BI__builtin_ia32_movdqa64load512_mask: {
- unsigned Align =
- getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
- return EmitX86MaskedLoad(*this, Ops, Align);
- }
+ return EmitX86MaskedLoad(*this, Ops, 32);
+
+ case X86::BI__builtin_ia32_loadaps512_mask:
+ case X86::BI__builtin_ia32_loadapd512_mask:
+ case X86::BI__builtin_ia32_movdqa32load512_mask:
+ case X86::BI__builtin_ia32_movdqa64load512_mask:
+ return EmitX86MaskedLoad(*this, Ops, 64);
case X86::BI__builtin_ia32_vbroadcastf128_pd256:
case X86::BI__builtin_ia32_vbroadcastf128_ps256: {
OpenPOWER on IntegriCloud