diff options
-rw-r--r-- | clang/lib/Basic/Targets/OSTargets.h | 3 | ||||
-rw-r--r-- | clang/lib/Basic/Targets/X86.h | 7 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 37 | ||||
-rw-r--r-- | clang/lib/Headers/avx512fintrin.h | 42 | ||||
-rw-r--r-- | clang/lib/Headers/avxintrin.h | 37 | ||||
-rw-r--r-- | clang/test/CodeGen/arm-swiftcall.c | 4 | ||||
-rw-r--r-- | clang/test/CodeGen/vector-alignment.c | 84 | ||||
-rw-r--r-- | clang/test/CodeGenCXX/align-avx-complete-objects.cpp | 6 |
8 files changed, 123 insertions, 97 deletions
diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h index 18239a2ec77..6cc3c095557 100644 --- a/clang/lib/Basic/Targets/OSTargets.h +++ b/clang/lib/Basic/Targets/OSTargets.h @@ -113,6 +113,9 @@ public: } this->MCountName = "\01mcount"; + + // Cap vector alignment at 16 bytes for all Darwin platforms. + this->MaxVectorAlign = 128; } std::string isValidSectionSpecifier(StringRef SR) const override { diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 5b862face9b..9d277e94cc0 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -421,7 +421,6 @@ public: LongDoubleWidth = 128; LongDoubleAlign = 128; SuitableAlign = 128; - MaxVectorAlign = 256; // The watchOS simulator uses the builtin bool type for Objective-C. llvm::Triple T = llvm::Triple(Triple); if (T.isWatchOS()) @@ -437,9 +436,6 @@ public: if (!DarwinTargetInfo<X86_32TargetInfo>::handleTargetFeatures(Features, Diags)) return false; - // We now know the features we have: we can decide how to align vectors. - MaxVectorAlign = - hasFeature("avx512f") ? 512 : hasFeature("avx") ? 256 : 128; return true; } }; @@ -802,9 +798,6 @@ public: if (!DarwinTargetInfo<X86_64TargetInfo>::handleTargetFeatures(Features, Diags)) return false; - // We now know the features we have: we can decide how to align vectors. - MaxVectorAlign = - hasFeature("avx512f") ? 512 : hasFeature("avx") ? 256 : 128; return true; } }; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2dd9c63ffab..b0ef01e63e4 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -8918,18 +8918,20 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_movdqa64store128_mask: case X86::BI__builtin_ia32_storeaps128_mask: case X86::BI__builtin_ia32_storeapd128_mask: + return EmitX86MaskedStore(*this, Ops, 16); + case X86::BI__builtin_ia32_movdqa32store256_mask: case X86::BI__builtin_ia32_movdqa64store256_mask: case X86::BI__builtin_ia32_storeaps256_mask: case X86::BI__builtin_ia32_storeapd256_mask: + return EmitX86MaskedStore(*this, Ops, 32); + case X86::BI__builtin_ia32_movdqa32store512_mask: case X86::BI__builtin_ia32_movdqa64store512_mask: case X86::BI__builtin_ia32_storeaps512_mask: - case X86::BI__builtin_ia32_storeapd512_mask: { - unsigned Align = - getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); - return EmitX86MaskedStore(*this, Ops, Align); - } + case X86::BI__builtin_ia32_storeapd512_mask: + return EmitX86MaskedStore(*this, Ops, 64); + case X86::BI__builtin_ia32_loadups128_mask: case X86::BI__builtin_ia32_loadups256_mask: case X86::BI__builtin_ia32_loadups512_mask: @@ -8950,26 +8952,25 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_loaddqudi512_mask: return EmitX86MaskedLoad(*this, Ops, 1); + case X86::BI__builtin_ia32_loadaps128_mask: + case X86::BI__builtin_ia32_loadapd128_mask: case X86::BI__builtin_ia32_loadss128_mask: case X86::BI__builtin_ia32_loadsd128_mask: - return EmitX86MaskedLoad(*this, Ops, 1); + case X86::BI__builtin_ia32_movdqa32load128_mask: + case X86::BI__builtin_ia32_movdqa64load128_mask: + return EmitX86MaskedLoad(*this, Ops, 16); - case X86::BI__builtin_ia32_loadaps128_mask: case X86::BI__builtin_ia32_loadaps256_mask: - case X86::BI__builtin_ia32_loadaps512_mask: - case X86::BI__builtin_ia32_loadapd128_mask: case X86::BI__builtin_ia32_loadapd256_mask: - case X86::BI__builtin_ia32_loadapd512_mask: - case X86::BI__builtin_ia32_movdqa32load128_mask: case X86::BI__builtin_ia32_movdqa32load256_mask: - case X86::BI__builtin_ia32_movdqa32load512_mask: - case X86::BI__builtin_ia32_movdqa64load128_mask: case X86::BI__builtin_ia32_movdqa64load256_mask: - case X86::BI__builtin_ia32_movdqa64load512_mask: { - unsigned Align = - getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); - return EmitX86MaskedLoad(*this, Ops, Align); - } + return EmitX86MaskedLoad(*this, Ops, 32); + + case X86::BI__builtin_ia32_loadaps512_mask: + case X86::BI__builtin_ia32_loadapd512_mask: + case X86::BI__builtin_ia32_movdqa32load512_mask: + case X86::BI__builtin_ia32_movdqa64load512_mask: + return EmitX86MaskedLoad(*this, Ops, 64); case X86::BI__builtin_ia32_vbroadcastf128_pd256: case X86::BI__builtin_ia32_vbroadcastf128_ps256: { diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 906556a39e2..65fb5b584ea 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -27,22 +27,22 @@ #ifndef __AVX512FINTRIN_H #define __AVX512FINTRIN_H -typedef char __v64qi __attribute__((__vector_size__(64))); -typedef short __v32hi __attribute__((__vector_size__(64))); -typedef double __v8df __attribute__((__vector_size__(64))); -typedef float __v16sf __attribute__((__vector_size__(64))); -typedef long long __v8di __attribute__((__vector_size__(64))); -typedef int __v16si __attribute__((__vector_size__(64))); +typedef char __v64qi __attribute__((__vector_size__(64))) __attribute__((__aligned__(64))); +typedef short __v32hi __attribute__((__vector_size__(64))) __attribute__((__aligned__(64))); +typedef double __v8df __attribute__((__vector_size__(64))) __attribute__((__aligned__(64))); +typedef float __v16sf __attribute__((__vector_size__(64))) __attribute__((__aligned__(64))); +typedef long long __v8di __attribute__((__vector_size__(64))) __attribute__((__aligned__(64))); +typedef int __v16si __attribute__((__vector_size__(64))) __attribute__((__aligned__(64))); /* Unsigned types */ -typedef unsigned char __v64qu __attribute__((__vector_size__(64))); -typedef unsigned short __v32hu __attribute__((__vector_size__(64))); -typedef unsigned long long __v8du __attribute__((__vector_size__(64))); -typedef unsigned int __v16su __attribute__((__vector_size__(64))); +typedef unsigned char __v64qu __attribute__((__vector_size__(64))) __attribute__((__aligned__(64))); +typedef unsigned short __v32hu __attribute__((__vector_size__(64))) __attribute__((__aligned__(64))); +typedef unsigned long long __v8du __attribute__((__vector_size__(64))) __attribute__((__aligned__(64))); +typedef unsigned int __v16su __attribute__((__vector_size__(64))) __attribute__((__aligned__(64))); -typedef float __m512 __attribute__((__vector_size__(64))); -typedef double __m512d __attribute__((__vector_size__(64))); -typedef long long __m512i __attribute__((__vector_size__(64))); +typedef float __m512 __attribute__((__vector_size__(64))) __attribute__((__aligned__(64))); +typedef double __m512d __attribute__((__vector_size__(64))) __attribute__((__aligned__(64))); +typedef long long __m512i __attribute__((__vector_size__(64))) __attribute__((__aligned__(64))); typedef unsigned char __mmask8; typedef unsigned short __mmask16; @@ -4812,7 +4812,7 @@ _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A) static __inline void __DEFAULT_FN_ATTRS _mm512_store_pd(void *__P, __m512d __A) { - *(__m512d*)__P = __A; + *(__m512d *) __P = __A; } static __inline void __DEFAULT_FN_ATTRS @@ -4825,7 +4825,7 @@ _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A) static __inline void __DEFAULT_FN_ATTRS _mm512_store_ps(void *__P, __m512 __A) { - *(__m512*)__P = __A; + *(__m512 *) __P = __A; } static __inline void __DEFAULT_FN_ATTRS @@ -8779,29 +8779,25 @@ _mm512_kxor (__mmask16 __A, __mmask16 __B) static __inline__ void __DEFAULT_FN_ATTRS _mm512_stream_si512 (__m512i * __P, __m512i __A) { - typedef __v8di __v8di_aligned __attribute__((aligned(64))); - __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P); + __builtin_nontemporal_store((__v8di)__A, (__v8di*)__P); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_stream_load_si512 (void const *__P) { - typedef __v8di __v8di_aligned __attribute__((aligned(64))); - return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P); + return (__m512i) __builtin_nontemporal_load((const __v8di *)__P); } static __inline__ void __DEFAULT_FN_ATTRS _mm512_stream_pd (double *__P, __m512d __A) { - typedef __v8df __v8df_aligned __attribute__((aligned(64))); - __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P); + __builtin_nontemporal_store((__v8df)__A, (__v8df*)__P); } static __inline__ void __DEFAULT_FN_ATTRS _mm512_stream_ps (float *__P, __m512 __A) { - typedef __v16sf __v16sf_aligned __attribute__((aligned(64))); - __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P); + __builtin_nontemporal_store((__v16sf)__A, (__v16sf*)__P); } static __inline__ __m512d __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index a7e844a25e0..d1a9028c400 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -28,26 +28,26 @@ #ifndef __AVXINTRIN_H #define __AVXINTRIN_H -typedef double __v4df __attribute__ ((__vector_size__ (32))); -typedef float __v8sf __attribute__ ((__vector_size__ (32))); -typedef long long __v4di __attribute__ ((__vector_size__ (32))); -typedef int __v8si __attribute__ ((__vector_size__ (32))); -typedef short __v16hi __attribute__ ((__vector_size__ (32))); -typedef char __v32qi __attribute__ ((__vector_size__ (32))); +typedef double __v4df __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32))); +typedef float __v8sf __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32))); +typedef long long __v4di __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32))); +typedef int __v8si __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32))); +typedef short __v16hi __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32))); +typedef char __v32qi __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32))); /* Unsigned types */ -typedef unsigned long long __v4du __attribute__ ((__vector_size__ (32))); -typedef unsigned int __v8su __attribute__ ((__vector_size__ (32))); -typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32))); -typedef unsigned char __v32qu __attribute__ ((__vector_size__ (32))); +typedef unsigned long long __v4du __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32))); +typedef unsigned int __v8su __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32))); +typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32))); +typedef unsigned char __v32qu __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32))); /* We need an explicitly signed variant for char. Note that this shouldn't * appear in the interface though. */ -typedef signed char __v32qs __attribute__((__vector_size__(32))); +typedef signed char __v32qs __attribute__((__vector_size__(32))) __attribute__((__aligned__(32))); -typedef float __m256 __attribute__ ((__vector_size__ (32))); -typedef double __m256d __attribute__((__vector_size__(32))); -typedef long long __m256i __attribute__((__vector_size__(32))); +typedef float __m256 __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32))); +typedef double __m256d __attribute__((__vector_size__(32))) __attribute__((__aligned__(32))); +typedef long long __m256i __attribute__((__vector_size__(32))) __attribute__((__aligned__(32))); /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx"))) @@ -3589,8 +3589,7 @@ _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a) static __inline void __DEFAULT_FN_ATTRS _mm256_stream_si256(__m256i *__a, __m256i __b) { - typedef __v4di __v4di_aligned __attribute__((aligned(32))); - __builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a); + __builtin_nontemporal_store((__v4di)__b, (__v4di*)__a); } /// Moves double-precision values from a 256-bit vector of [4 x double] @@ -3609,8 +3608,7 @@ _mm256_stream_si256(__m256i *__a, __m256i __b) static __inline void __DEFAULT_FN_ATTRS _mm256_stream_pd(double *__a, __m256d __b) { - typedef __v4df __v4df_aligned __attribute__((aligned(32))); - __builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a); + __builtin_nontemporal_store((__v4df)__b, (__v4df*)__a); } /// Moves single-precision floating point values from a 256-bit vector @@ -3630,8 +3628,7 @@ _mm256_stream_pd(double *__a, __m256d __b) static __inline void __DEFAULT_FN_ATTRS _mm256_stream_ps(float *__p, __m256 __a) { - typedef __v8sf __v8sf_aligned __attribute__((aligned(32))); - __builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p); + __builtin_nontemporal_store((__v8sf)__a, (__v8sf*)__p); } /* Create vectors */ diff --git a/clang/test/CodeGen/arm-swiftcall.c b/clang/test/CodeGen/arm-swiftcall.c index 53109a3f681..75229c40a11 100644 --- a/clang/test/CodeGen/arm-swiftcall.c +++ b/clang/test/CodeGen/arm-swiftcall.c @@ -64,8 +64,8 @@ typedef double double2 __attribute__((ext_vector_type(2))); typedef double double4 __attribute__((ext_vector_type(4))); typedef int int3 __attribute__((ext_vector_type(3))); typedef int int4 __attribute__((ext_vector_type(4))); -typedef int int5 __attribute__((ext_vector_type(5))); -typedef int int8 __attribute__((ext_vector_type(8))); +typedef int int5 __attribute__((ext_vector_type(5))) __attribute__((aligned(32))); +typedef int int8 __attribute__((ext_vector_type(8))) __attribute__((aligned(32))); typedef char char16 __attribute__((ext_vector_type(16))); typedef short short8 __attribute__((ext_vector_type(8))); typedef long long long2 __attribute__((ext_vector_type(2))); diff --git a/clang/test/CodeGen/vector-alignment.c b/clang/test/CodeGen/vector-alignment.c index d1fd771fb7d..425819b5a0a 100644 --- a/clang/test/CodeGen/vector-alignment.c +++ b/clang/test/CodeGen/vector-alignment.c @@ -1,38 +1,68 @@ // RUN: %clang_cc1 -w -triple x86_64-apple-darwin10 \ -// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE +// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=DARWIN_SSE // RUN: %clang_cc1 -w -triple i386-apple-darwin10 \ -// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE +// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=DARWIN_SSE // RUN: %clang_cc1 -w -triple x86_64-apple-darwin10 -target-feature +avx \ -// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX +// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=DARWIN_AVX // RUN: %clang_cc1 -w -triple i386-apple-darwin10 -target-feature +avx \ -// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX +// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=DARWIN_AVX // RUN: %clang_cc1 -w -triple x86_64-apple-darwin10 -target-feature +avx512f \ -// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 +// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=DARWIN_AVX512 // RUN: %clang_cc1 -w -triple i386-apple-darwin10 -target-feature +avx512f \ -// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 +// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=DARWIN_AVX512 +// RUN: %clang_cc1 -w -triple armv7-apple-ios10 \ +// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=DARWIN_ARM32 +// RUN: %clang_cc1 -w -triple arm64-apple-ios10 \ +// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=DARWIN_ARM64 + +// RUN: %clang_cc1 -w -triple x86_64-pc-linux \ +// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=GENERIC +// RUN: %clang_cc1 -w -triple i386-pc-linux \ +// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=GENERIC +// RUN: %clang_cc1 -w -triple x86_64-pc-linux -target-feature +avx \ +// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=GENERIC +// RUN: %clang_cc1 -w -triple i386-pc-linux -target-feature +avx \ +// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=GENERIC +// RUN: %clang_cc1 -w -triple x86_64-pc-linux -target-feature +avx512f \ +// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=GENERIC +// RUN: %clang_cc1 -w -triple i386-pc-linux -target-feature +avx512f \ +// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=GENERIC + // rdar://11759609 // At or below target max alignment with no aligned attribute should align based // on the size of vector. double __attribute__((vector_size(16))) v1; -// SSE: @v1 {{.*}}, align 16 -// AVX: @v1 {{.*}}, align 16 -// AVX512: @v1 {{.*}}, align 16 +// DARWIN_SSE: @v1 {{.*}}, align 16 +// DARWIN_AVX: @v1 {{.*}}, align 16 +// DARWIN_AVX512: @v1 {{.*}}, align 16 +// DARWIN_ARM32: @v1 {{.*}}, align 16 +// DARWIN_ARM64: @v1 {{.*}}, align 16 +// GENERIC: @v1 {{.*}}, align 16 double __attribute__((vector_size(32))) v2; -// SSE: @v2 {{.*}}, align 16 -// AVX: @v2 {{.*}}, align 32 -// AVX512: @v2 {{.*}}, align 32 +// DARWIN_SSE: @v2 {{.*}}, align 16 +// DARWIN_AVX: @v2 {{.*}}, align 16 +// DARWIN_AVX512: @v2 {{.*}}, align 16 +// DARWIN_ARM32: @v2 {{.*}}, align 16 +// DARWIN_ARM64: @v2 {{.*}}, align 16 +// GENERIC: @v2 {{.*}}, align 32 // Alignment above target max alignment with no aligned attribute should align // based on the target max. double __attribute__((vector_size(64))) v3; -// SSE: @v3 {{.*}}, align 16 -// AVX: @v3 {{.*}}, align 32 -// AVX512: @v3 {{.*}}, align 64 +// DARWIN_SSE: @v3 {{.*}}, align 16 +// DARWIN_AVX: @v3 {{.*}}, align 16 +// DARWIN_AVX512: @v3 {{.*}}, align 16 +// DARWIN_ARM32: @v3 {{.*}}, align 16 +// DARWIN_ARM64: @v3 {{.*}}, align 16 +// GENERIC: @v3 {{.*}}, align 64 double __attribute__((vector_size(1024))) v4; -// SSE: @v4 {{.*}}, align 16 -// AVX: @v4 {{.*}}, align 32 -// AVX512: @v4 {{.*}}, align 64 +// DARWIN_SSE: @v4 {{.*}}, align 16 +// DARWIN_AVX: @v4 {{.*}}, align 16 +// DARWIN_AVX512: @v4 {{.*}}, align 16 +// DARWIN_ARM32: @v4 {{.*}}, align 16 +// DARWIN_ARM64: @v4 {{.*}}, align 16 +// GENERIC: @v4 {{.*}}, align 1024 // Aliged attribute should always override. double __attribute__((vector_size(16), aligned(16))) v5; @@ -46,13 +76,19 @@ double __attribute__((vector_size(32), aligned(64))) v8; // Check non-power of 2 widths. double __attribute__((vector_size(24))) v9; -// SSE: @v9 {{.*}}, align 16 -// AVX: @v9 {{.*}}, align 32 -// AVX512: @v9 {{.*}}, align 32 +// DARWIN_SSE: @v9 {{.*}}, align 16 +// DARWIN_AVX: @v9 {{.*}}, align 16 +// DARWIN_AVX512: @v9 {{.*}}, align 16 +// DARWIN_ARM32: @v9 {{.*}}, align 16 +// DARWIN_ARM64: @v9 {{.*}}, align 16 +// GENERIC: @v9 {{.*}}, align 32 double __attribute__((vector_size(40))) v10; -// SSE: @v10 {{.*}}, align 16 -// AVX: @v10 {{.*}}, align 32 -// AVX512: @v10 {{.*}}, align 64 +// DARWIN_SSE: @v10 {{.*}}, align 16 +// DARWIN_AVX: @v10 {{.*}}, align 16 +// DARWIN_AVX512: @v10 {{.*}}, align 16 +// DARWIN_ARM32: @v10 {{.*}}, align 16 +// DARWIN_ARM64: @v10 {{.*}}, align 16 +// GENERIC: @v10 {{.*}}, align 64 // Check non-power of 2 widths with aligned attribute. double __attribute__((vector_size(24), aligned(64))) v11; diff --git a/clang/test/CodeGenCXX/align-avx-complete-objects.cpp b/clang/test/CodeGenCXX/align-avx-complete-objects.cpp index ad4a91428d2..1271e021fcb 100644 --- a/clang/test/CodeGenCXX/align-avx-complete-objects.cpp +++ b/clang/test/CodeGenCXX/align-avx-complete-objects.cpp @@ -12,7 +12,7 @@ volatile float TestAlign(void) return r[0]; } -// CHECK: [[R:%.*]] = alloca <8 x float>, align 32 +// CHECK: [[R:%.*]] = alloca <8 x float>, align 16 // CHECK-NEXT: [[CALL:%.*]] = call i8* @_Znwm(i64 32) // CHECK-NEXT: [[ZERO:%.*]] = bitcast i8* [[CALL]] to <8 x float>* // CHECK-NEXT: store <8 x float>* [[ZERO]], <8 x float>** [[P:%.*]], align 8 @@ -22,8 +22,8 @@ volatile float TestAlign(void) // CHECK-NEXT: store volatile <8 x float> [[TWO]], <8 x float>* [[THREE]], align 16 // CHECK-NEXT: [[FOUR:%.*]] = load <8 x float>*, <8 x float>** [[P]], align 8 // CHECK-NEXT: [[FIVE:%.*]] = load volatile <8 x float>, <8 x float>* [[FOUR]], align 16 -// CHECK-NEXT: store <8 x float> [[FIVE]], <8 x float>* [[R]], align 32 -// CHECK-NEXT: [[SIX:%.*]] = load <8 x float>, <8 x float>* [[R]], align 32 +// CHECK-NEXT: store <8 x float> [[FIVE]], <8 x float>* [[R]], align 16 +// CHECK-NEXT: [[SIX:%.*]] = load <8 x float>, <8 x float>* [[R]], align 16 // CHECK-NEXT: [[VECEXT:%.*]] = extractelement <8 x float> [[SIX]], i32 0 // CHECK-NEXT: ret float [[VECEXT]] |