summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNico Weber <nicolasweber@gmx.de>2016-05-16 18:14:07 +0000
committerNico Weber <nicolasweber@gmx.de>2016-05-16 18:14:07 +0000
commit379a1952b37247975d2df8d23498675c9c8cc730 (patch)
tree0c7f914499c302a7ad9a1fcf6ebeff0bbea87f7e
parenta5bd2954e24be344f66fe1a019ee91cac14fced9 (diff)
downloadbcm5719-llvm-379a1952b37247975d2df8d23498675c9c8cc730.tar.gz
bcm5719-llvm-379a1952b37247975d2df8d23498675c9c8cc730.zip
[ms] Reintroduce feature guards in intrinsic headers in Microsoft mode
Visual Studio's C++ standard library headers include intrin.h, so the intrinsic headers get included a lot more often in Microsoft mode than elsewhere. The AVX512 intrinsics are a lot of code (0.7 MB, causing 30% compile time overhead for small programs including e.g. <string> and 6% compile time overhead for larger projects like e.g. v8). Since multiversioning can't be relied on in Microsoft mode (cl.exe doesn't support it), having faster compiles seems like the much better tradeoff until we have a better intrinsic story going forward (which we'll need for e.g. PR19898). Actually using intrinsics on Windows already requires the right /arch: settings, so this patch should have no big behavior change. See also thread "The intrinsics headers (especially avx512) are too big. What to do about it?" on cfe-dev. http://reviews.llvm.org/D20291 llvm-svn: 269675
-rw-r--r--clang/lib/Headers/immintrin.h82
-rw-r--r--clang/lib/Headers/x86intrin.h24
-rw-r--r--clang/test/CodeGen/ms-mm-align.c2
3 files changed, 106 insertions, 2 deletions
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index a74dad84a49..d6c6ae5e4c5 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -24,22 +24,41 @@
#ifndef __IMMINTRIN_H
#define __IMMINTRIN_H
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__)
#include <mmintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__)
#include <xmmintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__)
#include <emmintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__)
#include <pmmintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__)
#include <tmmintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || \
+ (defined(__SSE4_2__) || defined(__SSE4_1__))
#include <smmintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || \
+ (defined(__AES__) || defined(__PCLMUL__))
#include <wmmintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__)
#include <avxintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__)
#include <avx2intrin.h>
/* The 256-bit versions of functions in f16cintrin.h.
@@ -54,45 +73,90 @@ _mm256_cvtph_ps(__m128i __a)
{
return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
}
+#endif /* __AVX2__ */
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
#include <bmiintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)
#include <bmi2intrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__)
#include <lzcntintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__)
#include <fmaintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__)
#include <avx512fintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__)
#include <avx512vlintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__)
#include <avx512bwintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)
#include <avx512cdintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
#include <avx512dqintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || \
+ (defined(__AVX512VL__) && defined(__AVX512BW__))
#include <avx512vlbwintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || \
+ (defined(__AVX512VL__) && defined(__AVX512CD__))
#include <avx512vlcdintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || \
+ (defined(__AVX512VL__) && defined(__AVX512DQ__))
#include <avx512vldqintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__)
#include <avx512erintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__)
#include <avx512ifmaintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || \
+ (defined(__AVX512IFMA__) && defined(__AVX512VL__))
#include <avx512ifmavlintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__)
#include <avx512vbmiintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || \
+ (defined(__AVX512VBMI__) && defined(__AVX512VL__))
#include <avx512vbmivlintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)
#include <avx512pfintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__)
#include <pkuintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
_rdrand16_step(unsigned short *__p)
{
@@ -112,7 +176,9 @@ _rdrand64_step(unsigned long long *__p)
return __builtin_ia32_rdrand64_step(__p);
}
#endif
+#endif /* __RDRND__ */
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__)
#ifdef __x86_64__
static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_readfsbase_u32(void)
@@ -162,22 +228,36 @@ _writegsbase_u64(unsigned long long __V)
return __builtin_ia32_wrgsbase64(__V);
}
#endif
+#endif /* __FSGSBASE__ */
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__)
#include <rtmintrin.h>
-
#include <xtestintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__)
#include <shaintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__)
#include <fxsrintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVE__)
#include <xsaveintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__)
#include <xsaveoptintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__)
#include <xsavecintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__)
#include <xsavesintrin.h>
+#endif
/* Some intrinsics inside adxintrin.h are available only on processors with ADX,
* whereas others are also available at all times. */
diff --git a/clang/lib/Headers/x86intrin.h b/clang/lib/Headers/x86intrin.h
index 4d8077e3829..6f3c8c4f3f8 100644
--- a/clang/lib/Headers/x86intrin.h
+++ b/clang/lib/Headers/x86intrin.h
@@ -28,29 +28,53 @@
#include <immintrin.h>
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__3dNOW__)
#include <mm3dnow.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
#include <bmiintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)
#include <bmi2intrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__)
#include <lzcntintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__)
#include <popcntintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__)
#include <rdseedintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__PRFCHW__)
#include <prfchwintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE4A__)
#include <ammintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA4__)
#include <fma4intrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XOP__)
#include <xopintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__TBM__)
#include <tbmintrin.h>
+#endif
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__)
#include <f16cintrin.h>
+#endif
/* FIXME: LWP */
diff --git a/clang/test/CodeGen/ms-mm-align.c b/clang/test/CodeGen/ms-mm-align.c
index ae8e98086a7..49fedddc01a 100644
--- a/clang/test/CodeGen/ms-mm-align.c
+++ b/clang/test/CodeGen/ms-mm-align.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \
+// RUN: %clang_cc1 -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -target-feature +sse \
// RUN: -triple i686--windows -emit-llvm %s -o - \
// RUN: | FileCheck %s -check-prefix CHECK
OpenPOWER on IntegriCloud