Cap "voluntary" vector alignment at 16 for all Darwin platforms.

This fixes two major problems: - We were not capping vector alignment as desired on 32-bit ARM. - We were using different alignments based on the AVX settings on Intel, so we did not have a consistent ABI. This is an ABI break, but we think we can get away with it because vectors tend to be used mostly in inline code (which is why not having a consistent ABI has not proven disastrous on Intel). Intel's AVX types are specified as having 32-byte / 64-byte alignment, so align them explicitly instead of relying on the base ABI rule. Note that this sort of attribute is stripped from template arguments in template substitution, so there's a possibility that code templated over vectors will produce inadequately-aligned objects. The right long-term solution for this is for alignment attributes to be interpreted as true qualifiers and thus preserved in the canonical type. llvm-svn: 333791
author: John McCall <rjmccall@apple.com> 2018-06-01 21:34:26 +0000
committer: John McCall <rjmccall@apple.com> 2018-06-01 21:34:26 +0000
commit: 280c6560317fcb73872e340621861dfd6ecb9333 (patch)
tree: 5633ce15ae30163a63376c4d5c4ff6b924a8fd8e /clang/lib
parent: b8d861c27a1bdf8576f964a3fac533c3df7fe39a (diff)
download: bcm5719-llvm-280c6560317fcb73872e340621861dfd6ecb9333.tar.gz
bcm5719-llvm-280c6560317fcb73872e340621861dfd6ecb9333.zip
5 files changed, 58 insertions, 68 deletions
diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h
index 18239a2ec77..6cc3c095557 100644
--- a/clang/lib/Basic/Targets/OSTargets.h
+++ b/clang/lib/Basic/Targets/OSTargets.h
@@ -113,6 +113,9 @@ public:
     }
 
     this->MCountName = "\01mcount";
+
+    // Cap vector alignment at 16 bytes for all Darwin platforms.
+    this->MaxVectorAlign = 128;
   }
 
   std::string isValidSectionSpecifier(StringRef SR) const override {
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index 5b862face9b..9d277e94cc0 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -421,7 +421,6 @@ public:
     LongDoubleWidth = 128;
     LongDoubleAlign = 128;
     SuitableAlign = 128;
-    MaxVectorAlign = 256;
     // The watchOS simulator uses the builtin bool type for Objective-C.
     llvm::Triple T = llvm::Triple(Triple);
     if (T.isWatchOS())
@@ -437,9 +436,6 @@ public:
     if (!DarwinTargetInfo<X86_32TargetInfo>::handleTargetFeatures(Features,
                                                                   Diags))
       return false;
-    // We now know the features we have: we can decide how to align vectors.
-    MaxVectorAlign =
-        hasFeature("avx512f") ? 512 : hasFeature("avx") ? 256 : 128;
     return true;
   }
 };
@@ -802,9 +798,6 @@ public:
     if (!DarwinTargetInfo<X86_64TargetInfo>::handleTargetFeatures(Features,
                                                                   Diags))
       return false;
-    // We now know the features we have: we can decide how to align vectors.
-    MaxVectorAlign =
-        hasFeature("avx512f") ? 512 : hasFeature("avx") ? 256 : 128;
     return true;
   }
 };
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 2dd9c63ffab..b0ef01e63e4 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -8918,18 +8918,20 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
   case X86::BI__builtin_ia32_movdqa64store128_mask:
   case X86::BI__builtin_ia32_storeaps128_mask:
   case X86::BI__builtin_ia32_storeapd128_mask:
+    return EmitX86MaskedStore(*this, Ops, 16);
+
   case X86::BI__builtin_ia32_movdqa32store256_mask:
   case X86::BI__builtin_ia32_movdqa64store256_mask:
   case X86::BI__builtin_ia32_storeaps256_mask:
   case X86::BI__builtin_ia32_storeapd256_mask:
+    return EmitX86MaskedStore(*this, Ops, 32);
+
   case X86::BI__builtin_ia32_movdqa32store512_mask:
   case X86::BI__builtin_ia32_movdqa64store512_mask:
   case X86::BI__builtin_ia32_storeaps512_mask:
-  case X86::BI__builtin_ia32_storeapd512_mask: {
-    unsigned Align =
-      getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
-    return EmitX86MaskedStore(*this, Ops, Align);
-  }
+  case X86::BI__builtin_ia32_storeapd512_mask:
+    return EmitX86MaskedStore(*this, Ops, 64);
+
   case X86::BI__builtin_ia32_loadups128_mask:
   case X86::BI__builtin_ia32_loadups256_mask:
   case X86::BI__builtin_ia32_loadups512_mask:
@@ -8950,26 +8952,25 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
   case X86::BI__builtin_ia32_loaddqudi512_mask:
     return EmitX86MaskedLoad(*this, Ops, 1);
 
+  case X86::BI__builtin_ia32_loadaps128_mask:
+  case X86::BI__builtin_ia32_loadapd128_mask:
   case X86::BI__builtin_ia32_loadss128_mask:
   case X86::BI__builtin_ia32_loadsd128_mask:
-    return EmitX86MaskedLoad(*this, Ops, 1);
+  case X86::BI__builtin_ia32_movdqa32load128_mask:
+  case X86::BI__builtin_ia32_movdqa64load128_mask:
+    return EmitX86MaskedLoad(*this, Ops, 16);
 
-  case X86::BI__builtin_ia32_loadaps128_mask:
   case X86::BI__builtin_ia32_loadaps256_mask:
-  case X86::BI__builtin_ia32_loadaps512_mask:
-  case X86::BI__builtin_ia32_loadapd128_mask:
   case X86::BI__builtin_ia32_loadapd256_mask:
-  case X86::BI__builtin_ia32_loadapd512_mask:
-  case X86::BI__builtin_ia32_movdqa32load128_mask:
   case X86::BI__builtin_ia32_movdqa32load256_mask:
-  case X86::BI__builtin_ia32_movdqa32load512_mask:
-  case X86::BI__builtin_ia32_movdqa64load128_mask:
   case X86::BI__builtin_ia32_movdqa64load256_mask:
-  case X86::BI__builtin_ia32_movdqa64load512_mask: {
-    unsigned Align =
-      getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
-    return EmitX86MaskedLoad(*this, Ops, Align);
-  }
+    return EmitX86MaskedLoad(*this, Ops, 32);
+
+  case X86::BI__builtin_ia32_loadaps512_mask:
+  case X86::BI__builtin_ia32_loadapd512_mask:
+  case X86::BI__builtin_ia32_movdqa32load512_mask:
+  case X86::BI__builtin_ia32_movdqa64load512_mask:
+    return EmitX86MaskedLoad(*this, Ops, 64);
 
   case X86::BI__builtin_ia32_vbroadcastf128_pd256:
   case X86::BI__builtin_ia32_vbroadcastf128_ps256: {
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 906556a39e2..65fb5b584ea 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -27,22 +27,22 @@
 #ifndef __AVX512FINTRIN_H
 #define __AVX512FINTRIN_H
 
-typedef char __v64qi __attribute__((__vector_size__(64)));
-typedef short __v32hi __attribute__((__vector_size__(64)));
-typedef double __v8df __attribute__((__vector_size__(64)));
-typedef float __v16sf __attribute__((__vector_size__(64)));
-typedef long long __v8di __attribute__((__vector_size__(64)));
-typedef int __v16si __attribute__((__vector_size__(64)));
+typedef char __v64qi __attribute__((__vector_size__(64))) __attribute__((__aligned__(64)));
+typedef short __v32hi __attribute__((__vector_size__(64))) __attribute__((__aligned__(64)));
+typedef double __v8df __attribute__((__vector_size__(64))) __attribute__((__aligned__(64)));
+typedef float __v16sf __attribute__((__vector_size__(64))) __attribute__((__aligned__(64)));
+typedef long long __v8di __attribute__((__vector_size__(64))) __attribute__((__aligned__(64)));
+typedef int __v16si __attribute__((__vector_size__(64))) __attribute__((__aligned__(64)));
 
 /* Unsigned types */
-typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
-typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
-typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
-typedef unsigned int __v16su __attribute__((__vector_size__(64)));
+typedef unsigned char __v64qu __attribute__((__vector_size__(64))) __attribute__((__aligned__(64)));
+typedef unsigned short __v32hu __attribute__((__vector_size__(64))) __attribute__((__aligned__(64)));
+typedef unsigned long long __v8du __attribute__((__vector_size__(64))) __attribute__((__aligned__(64)));
+typedef unsigned int __v16su __attribute__((__vector_size__(64))) __attribute__((__aligned__(64)));
 
-typedef float __m512 __attribute__((__vector_size__(64)));
-typedef double __m512d __attribute__((__vector_size__(64)));
-typedef long long __m512i __attribute__((__vector_size__(64)));
+typedef float __m512 __attribute__((__vector_size__(64))) __attribute__((__aligned__(64)));
+typedef double __m512d __attribute__((__vector_size__(64))) __attribute__((__aligned__(64)));
+typedef long long __m512i __attribute__((__vector_size__(64))) __attribute__((__aligned__(64)));
 
 typedef unsigned char __mmask8;
 typedef unsigned short __mmask16;
@@ -4812,7 +4812,7 @@ _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
 static __inline void __DEFAULT_FN_ATTRS
 _mm512_store_pd(void *__P, __m512d __A)
 {
-  *(__m512d*)__P = __A;
+  *(__m512d *) __P = __A;
 }
 
 static __inline void __DEFAULT_FN_ATTRS
@@ -4825,7 +4825,7 @@ _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
 static __inline void __DEFAULT_FN_ATTRS
 _mm512_store_ps(void *__P, __m512 __A)
 {
-  *(__m512*)__P = __A;
+  *(__m512 *) __P = __A;
 }
 
 static __inline void __DEFAULT_FN_ATTRS
@@ -8779,29 +8779,25 @@ _mm512_kxor (__mmask16 __A, __mmask16 __B)
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm512_stream_si512 (__m512i * __P, __m512i __A)
 {
-  typedef __v8di __v8di_aligned __attribute__((aligned(64)));
-  __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
+  __builtin_nontemporal_store((__v8di)__A, (__v8di*)__P);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_stream_load_si512 (void const *__P)
 {
-  typedef __v8di __v8di_aligned __attribute__((aligned(64)));
-  return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
+  return (__m512i) __builtin_nontemporal_load((const __v8di *)__P);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm512_stream_pd (double *__P, __m512d __A)
 {
-  typedef __v8df __v8df_aligned __attribute__((aligned(64)));
-  __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
+  __builtin_nontemporal_store((__v8df)__A, (__v8df*)__P);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm512_stream_ps (float *__P, __m512 __A)
 {
-  typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
-  __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
+  __builtin_nontemporal_store((__v16sf)__A, (__v16sf*)__P);
 }
 
 static __inline__ __m512d __DEFAULT_FN_ATTRS
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index a7e844a25e0..d1a9028c400 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -28,26 +28,26 @@
 #ifndef __AVXINTRIN_H
 #define __AVXINTRIN_H
 
-typedef double __v4df __attribute__ ((__vector_size__ (32)));
-typedef float __v8sf __attribute__ ((__vector_size__ (32)));
-typedef long long __v4di __attribute__ ((__vector_size__ (32)));
-typedef int __v8si __attribute__ ((__vector_size__ (32)));
-typedef short __v16hi __attribute__ ((__vector_size__ (32)));
-typedef char __v32qi __attribute__ ((__vector_size__ (32)));
+typedef double __v4df __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32)));
+typedef float __v8sf __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32)));
+typedef long long __v4di __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32)));
+typedef int __v8si __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32)));
+typedef short __v16hi __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32)));
+typedef char __v32qi __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32)));
 
 /* Unsigned types */
-typedef unsigned long long __v4du __attribute__ ((__vector_size__ (32)));
-typedef unsigned int __v8su __attribute__ ((__vector_size__ (32)));
-typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32)));
-typedef unsigned char __v32qu __attribute__ ((__vector_size__ (32)));
+typedef unsigned long long __v4du __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32)));
+typedef unsigned int __v8su __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32)));
+typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32)));
+typedef unsigned char __v32qu __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32)));
 
 /* We need an explicitly signed variant for char. Note that this shouldn't
  * appear in the interface though. */
-typedef signed char __v32qs __attribute__((__vector_size__(32)));
+typedef signed char __v32qs __attribute__((__vector_size__(32))) __attribute__((__aligned__(32)));
 
-typedef float __m256 __attribute__ ((__vector_size__ (32)));
-typedef double __m256d __attribute__((__vector_size__(32)));
-typedef long long __m256i __attribute__((__vector_size__(32)));
+typedef float __m256 __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32)));
+typedef double __m256d __attribute__((__vector_size__(32))) __attribute__((__aligned__(32)));
+typedef long long __m256i __attribute__((__vector_size__(32))) __attribute__((__aligned__(32)));
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx")))
@@ -3589,8 +3589,7 @@ _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a)
 static __inline void __DEFAULT_FN_ATTRS
 _mm256_stream_si256(__m256i *__a, __m256i __b)
 {
-  typedef __v4di __v4di_aligned __attribute__((aligned(32)));
-  __builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a);
+  __builtin_nontemporal_store((__v4di)__b, (__v4di*)__a);
 }
 
 /// Moves double-precision values from a 256-bit vector of [4 x double]
@@ -3609,8 +3608,7 @@ _mm256_stream_si256(__m256i *__a, __m256i __b)
 static __inline void __DEFAULT_FN_ATTRS
 _mm256_stream_pd(double *__a, __m256d __b)
 {
-  typedef __v4df __v4df_aligned __attribute__((aligned(32)));
-  __builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a);
+  __builtin_nontemporal_store((__v4df)__b, (__v4df*)__a);
 }
 
 /// Moves single-precision floating point values from a 256-bit vector
@@ -3630,8 +3628,7 @@ _mm256_stream_pd(double *__a, __m256d __b)
 static __inline void __DEFAULT_FN_ATTRS
 _mm256_stream_ps(float *__p, __m256 __a)
 {
-  typedef __v8sf __v8sf_aligned __attribute__((aligned(32)));
-  __builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p);
+  __builtin_nontemporal_store((__v8sf)__a, (__v8sf*)__p);
 }
 
 /* Create vectors */
author	John McCall <rjmccall@apple.com>	2018-06-01 21:34:26 +0000
committer	John McCall <rjmccall@apple.com>	2018-06-01 21:34:26 +0000
commit	280c6560317fcb73872e340621861dfd6ecb9333 (patch)
tree	5633ce15ae30163a63376c4d5c4ff6b924a8fd8e /clang/lib
parent	b8d861c27a1bdf8576f964a3fac533c3df7fe39a (diff)
download	bcm5719-llvm-280c6560317fcb73872e340621861dfd6ecb9333.tar.gz bcm5719-llvm-280c6560317fcb73872e340621861dfd6ecb9333.zip