summaryrefslogtreecommitdiffstats
path: root/clang/lib
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/Basic/Targets/OSTargets.h3
-rw-r--r--clang/lib/Basic/Targets/X86.h7
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp37
-rw-r--r--clang/lib/Headers/avx512fintrin.h42
-rw-r--r--clang/lib/Headers/avxintrin.h37
5 files changed, 68 insertions, 58 deletions
diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h
index 6cc3c095557..18239a2ec77 100644
--- a/clang/lib/Basic/Targets/OSTargets.h
+++ b/clang/lib/Basic/Targets/OSTargets.h
@@ -113,9 +113,6 @@ public:
}
this->MCountName = "\01mcount";
-
- // Cap vector alignment at 16 bytes for all Darwin platforms.
- this->MaxVectorAlign = 128;
}
std::string isValidSectionSpecifier(StringRef SR) const override {
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index 9d277e94cc0..5b862face9b 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -421,6 +421,7 @@ public:
LongDoubleWidth = 128;
LongDoubleAlign = 128;
SuitableAlign = 128;
+ MaxVectorAlign = 256;
// The watchOS simulator uses the builtin bool type for Objective-C.
llvm::Triple T = llvm::Triple(Triple);
if (T.isWatchOS())
@@ -436,6 +437,9 @@ public:
if (!DarwinTargetInfo<X86_32TargetInfo>::handleTargetFeatures(Features,
Diags))
return false;
+ // We now know the features we have: we can decide how to align vectors.
+ MaxVectorAlign =
+ hasFeature("avx512f") ? 512 : hasFeature("avx") ? 256 : 128;
return true;
}
};
@@ -798,6 +802,9 @@ public:
if (!DarwinTargetInfo<X86_64TargetInfo>::handleTargetFeatures(Features,
Diags))
return false;
+ // We now know the features we have: we can decide how to align vectors.
+ MaxVectorAlign =
+ hasFeature("avx512f") ? 512 : hasFeature("avx") ? 256 : 128;
return true;
}
};
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index dd0e3510753..8793f83e293 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -8885,20 +8885,18 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_movdqa64store128_mask:
case X86::BI__builtin_ia32_storeaps128_mask:
case X86::BI__builtin_ia32_storeapd128_mask:
- return EmitX86MaskedStore(*this, Ops, 16);
-
case X86::BI__builtin_ia32_movdqa32store256_mask:
case X86::BI__builtin_ia32_movdqa64store256_mask:
case X86::BI__builtin_ia32_storeaps256_mask:
case X86::BI__builtin_ia32_storeapd256_mask:
- return EmitX86MaskedStore(*this, Ops, 32);
-
case X86::BI__builtin_ia32_movdqa32store512_mask:
case X86::BI__builtin_ia32_movdqa64store512_mask:
case X86::BI__builtin_ia32_storeaps512_mask:
- case X86::BI__builtin_ia32_storeapd512_mask:
- return EmitX86MaskedStore(*this, Ops, 64);
-
+ case X86::BI__builtin_ia32_storeapd512_mask: {
+ unsigned Align =
+ getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
+ return EmitX86MaskedStore(*this, Ops, Align);
+ }
case X86::BI__builtin_ia32_loadups128_mask:
case X86::BI__builtin_ia32_loadups256_mask:
case X86::BI__builtin_ia32_loadups512_mask:
@@ -8919,25 +8917,26 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_loaddqudi512_mask:
return EmitX86MaskedLoad(*this, Ops, 1);
- case X86::BI__builtin_ia32_loadaps128_mask:
- case X86::BI__builtin_ia32_loadapd128_mask:
case X86::BI__builtin_ia32_loadss128_mask:
case X86::BI__builtin_ia32_loadsd128_mask:
- case X86::BI__builtin_ia32_movdqa32load128_mask:
- case X86::BI__builtin_ia32_movdqa64load128_mask:
- return EmitX86MaskedLoad(*this, Ops, 16);
+ return EmitX86MaskedLoad(*this, Ops, 1);
+ case X86::BI__builtin_ia32_loadaps128_mask:
case X86::BI__builtin_ia32_loadaps256_mask:
- case X86::BI__builtin_ia32_loadapd256_mask:
- case X86::BI__builtin_ia32_movdqa32load256_mask:
- case X86::BI__builtin_ia32_movdqa64load256_mask:
- return EmitX86MaskedLoad(*this, Ops, 32);
-
case X86::BI__builtin_ia32_loadaps512_mask:
+ case X86::BI__builtin_ia32_loadapd128_mask:
+ case X86::BI__builtin_ia32_loadapd256_mask:
case X86::BI__builtin_ia32_loadapd512_mask:
+ case X86::BI__builtin_ia32_movdqa32load128_mask:
+ case X86::BI__builtin_ia32_movdqa32load256_mask:
case X86::BI__builtin_ia32_movdqa32load512_mask:
- case X86::BI__builtin_ia32_movdqa64load512_mask:
- return EmitX86MaskedLoad(*this, Ops, 64);
+ case X86::BI__builtin_ia32_movdqa64load128_mask:
+ case X86::BI__builtin_ia32_movdqa64load256_mask:
+ case X86::BI__builtin_ia32_movdqa64load512_mask: {
+ unsigned Align =
+ getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
+ return EmitX86MaskedLoad(*this, Ops, Align);
+ }
case X86::BI__builtin_ia32_storehps:
case X86::BI__builtin_ia32_storelps: {
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 9f514bc41a4..2b3633e377b 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -27,22 +27,22 @@
#ifndef __AVX512FINTRIN_H
#define __AVX512FINTRIN_H
-typedef char __v64qi __attribute__((__vector_size__(64))) __attribute__((__aligned__(64)));
-typedef short __v32hi __attribute__((__vector_size__(64))) __attribute__((__aligned__(64)));
-typedef double __v8df __attribute__((__vector_size__(64))) __attribute__((__aligned__(64)));
-typedef float __v16sf __attribute__((__vector_size__(64))) __attribute__((__aligned__(64)));
-typedef long long __v8di __attribute__((__vector_size__(64))) __attribute__((__aligned__(64)));
-typedef int __v16si __attribute__((__vector_size__(64))) __attribute__((__aligned__(64)));
+typedef char __v64qi __attribute__((__vector_size__(64)));
+typedef short __v32hi __attribute__((__vector_size__(64)));
+typedef double __v8df __attribute__((__vector_size__(64)));
+typedef float __v16sf __attribute__((__vector_size__(64)));
+typedef long long __v8di __attribute__((__vector_size__(64)));
+typedef int __v16si __attribute__((__vector_size__(64)));
/* Unsigned types */
-typedef unsigned char __v64qu __attribute__((__vector_size__(64))) __attribute__((__aligned__(64)));
-typedef unsigned short __v32hu __attribute__((__vector_size__(64))) __attribute__((__aligned__(64)));
-typedef unsigned long long __v8du __attribute__((__vector_size__(64))) __attribute__((__aligned__(64)));
-typedef unsigned int __v16su __attribute__((__vector_size__(64))) __attribute__((__aligned__(64)));
+typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
+typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
+typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
+typedef unsigned int __v16su __attribute__((__vector_size__(64)));
-typedef float __m512 __attribute__((__vector_size__(64))) __attribute__((__aligned__(64)));
-typedef double __m512d __attribute__((__vector_size__(64))) __attribute__((__aligned__(64)));
-typedef long long __m512i __attribute__((__vector_size__(64))) __attribute__((__aligned__(64)));
+typedef float __m512 __attribute__((__vector_size__(64)));
+typedef double __m512d __attribute__((__vector_size__(64)));
+typedef long long __m512i __attribute__((__vector_size__(64)));
typedef unsigned char __mmask8;
typedef unsigned short __mmask16;
@@ -4810,7 +4810,7 @@ _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline void __DEFAULT_FN_ATTRS
_mm512_store_pd(void *__P, __m512d __A)
{
- *(__m512d *) __P = __A;
+ *(__m512d*)__P = __A;
}
static __inline void __DEFAULT_FN_ATTRS
@@ -4823,7 +4823,7 @@ _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline void __DEFAULT_FN_ATTRS
_mm512_store_ps(void *__P, __m512 __A)
{
- *(__m512 *) __P = __A;
+ *(__m512*)__P = __A;
}
static __inline void __DEFAULT_FN_ATTRS
@@ -8777,25 +8777,29 @@ _mm512_kxor (__mmask16 __A, __mmask16 __B)
static __inline__ void __DEFAULT_FN_ATTRS
_mm512_stream_si512 (__m512i * __P, __m512i __A)
{
- __builtin_nontemporal_store((__v8di)__A, (__v8di*)__P);
+ typedef __v8di __v8di_aligned __attribute__((aligned(64)));
+ __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_stream_load_si512 (void const *__P)
{
- return (__m512i) __builtin_nontemporal_load((const __v8di *)__P);
+ typedef __v8di __v8di_aligned __attribute__((aligned(64)));
+ return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
}
static __inline__ void __DEFAULT_FN_ATTRS
_mm512_stream_pd (double *__P, __m512d __A)
{
- __builtin_nontemporal_store((__v8df)__A, (__v8df*)__P);
+ typedef __v8df __v8df_aligned __attribute__((aligned(64)));
+ __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
}
static __inline__ void __DEFAULT_FN_ATTRS
_mm512_stream_ps (float *__P, __m512 __A)
{
- __builtin_nontemporal_store((__v16sf)__A, (__v16sf*)__P);
+ typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
+ __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 908359fd1e5..64a4d94aa95 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -28,26 +28,26 @@
#ifndef __AVXINTRIN_H
#define __AVXINTRIN_H
-typedef double __v4df __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32)));
-typedef float __v8sf __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32)));
-typedef long long __v4di __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32)));
-typedef int __v8si __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32)));
-typedef short __v16hi __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32)));
-typedef char __v32qi __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32)));
+typedef double __v4df __attribute__ ((__vector_size__ (32)));
+typedef float __v8sf __attribute__ ((__vector_size__ (32)));
+typedef long long __v4di __attribute__ ((__vector_size__ (32)));
+typedef int __v8si __attribute__ ((__vector_size__ (32)));
+typedef short __v16hi __attribute__ ((__vector_size__ (32)));
+typedef char __v32qi __attribute__ ((__vector_size__ (32)));
/* Unsigned types */
-typedef unsigned long long __v4du __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32)));
-typedef unsigned int __v8su __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32)));
-typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32)));
-typedef unsigned char __v32qu __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32)));
+typedef unsigned long long __v4du __attribute__ ((__vector_size__ (32)));
+typedef unsigned int __v8su __attribute__ ((__vector_size__ (32)));
+typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32)));
+typedef unsigned char __v32qu __attribute__ ((__vector_size__ (32)));
/* We need an explicitly signed variant for char. Note that this shouldn't
* appear in the interface though. */
-typedef signed char __v32qs __attribute__((__vector_size__(32))) __attribute__((__aligned__(32)));
+typedef signed char __v32qs __attribute__((__vector_size__(32)));
-typedef float __m256 __attribute__ ((__vector_size__ (32))) __attribute__((__aligned__(32)));
-typedef double __m256d __attribute__((__vector_size__(32))) __attribute__((__aligned__(32)));
-typedef long long __m256i __attribute__((__vector_size__(32))) __attribute__((__aligned__(32)));
+typedef float __m256 __attribute__ ((__vector_size__ (32)));
+typedef double __m256d __attribute__((__vector_size__(32)));
+typedef long long __m256i __attribute__((__vector_size__(32)));
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx")))
@@ -3593,7 +3593,8 @@ _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a)
static __inline void __DEFAULT_FN_ATTRS
_mm256_stream_si256(__m256i *__a, __m256i __b)
{
- __builtin_nontemporal_store((__v4di)__b, (__v4di*)__a);
+ typedef __v4di __v4di_aligned __attribute__((aligned(32)));
+ __builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a);
}
/// Moves double-precision values from a 256-bit vector of [4 x double]
@@ -3612,7 +3613,8 @@ _mm256_stream_si256(__m256i *__a, __m256i __b)
static __inline void __DEFAULT_FN_ATTRS
_mm256_stream_pd(double *__a, __m256d __b)
{
- __builtin_nontemporal_store((__v4df)__b, (__v4df*)__a);
+ typedef __v4df __v4df_aligned __attribute__((aligned(32)));
+ __builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a);
}
/// Moves single-precision floating point values from a 256-bit vector
@@ -3632,7 +3634,8 @@ _mm256_stream_pd(double *__a, __m256d __b)
static __inline void __DEFAULT_FN_ATTRS
_mm256_stream_ps(float *__p, __m256 __a)
{
- __builtin_nontemporal_store((__v8sf)__a, (__v8sf*)__p);
+ typedef __v8sf __v8sf_aligned __attribute__((aligned(32)));
+ __builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p);
}
/* Create vectors */
OpenPOWER on IntegriCloud