diff options
-rw-r--r-- | clang/include/clang/Basic/BuiltinsX86.def | 17 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 25 | ||||
-rw-r--r-- | clang/lib/Headers/avxintrin.h | 74 | ||||
-rw-r--r-- | clang/lib/Headers/emmintrin.h | 19 | ||||
-rw-r--r-- | clang/lib/Headers/smmintrin.h | 48 | ||||
-rw-r--r-- | clang/lib/Sema/SemaChecking.cpp | 27 | ||||
-rw-r--r-- | clang/test/CodeGen/avx-builtins.c | 34 | ||||
-rw-r--r-- | clang/test/CodeGen/sse2-builtins.c | 10 | ||||
-rw-r--r-- | clang/test/CodeGen/sse41-builtins.c | 12 | ||||
-rw-r--r-- | clang/test/CodeGen/target-features-error-2.c | 2 | ||||
-rw-r--r-- | clang/test/CodeGen/vector.c | 2 |
11 files changed, 138 insertions, 132 deletions
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 0657354bd53..04646b8fd8c 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -272,6 +272,11 @@ TARGET_BUILTIN(__builtin_ia32_packsswb128, "V16cV8sV8s", "nc", "sse2") TARGET_BUILTIN(__builtin_ia32_packssdw128, "V8sV4iV4i", "nc", "sse2") TARGET_BUILTIN(__builtin_ia32_packuswb128, "V16cV8sV8s", "nc", "sse2") TARGET_BUILTIN(__builtin_ia32_pmulhuw128, "V8sV8sV8s", "nc", "sse2") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v2di, "LLiV2LLiIi", "nc", "sse2") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v4si, "iV4iIi", "nc", "sse2") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v4sf, "fV4fIi", "nc", "sse2") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v8hi, "sV8sIi", "nc", "sse2") +TARGET_BUILTIN(__builtin_ia32_vec_set_v8hi, "V8sV8ssIi", "nc", "sse2") TARGET_BUILTIN(__builtin_ia32_addsubps, "V4fV4fV4f", "nc", "sse3") TARGET_BUILTIN(__builtin_ia32_addsubpd, "V2dV2dV2d", "nc", "sse3") @@ -387,6 +392,10 @@ TARGET_BUILTIN(__builtin_ia32_ptestc128, "iV2LLiV2LLi", "nc", "sse4.1") TARGET_BUILTIN(__builtin_ia32_ptestnzc128, "iV2LLiV2LLi", "nc", "sse4.1") TARGET_BUILTIN(__builtin_ia32_mpsadbw128, "V16cV16cV16cIc", "nc", "sse4.1") TARGET_BUILTIN(__builtin_ia32_phminposuw128, "V8sV8s", "nc", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v16qi, "cV16cIi", "nc", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_vec_set_v16qi, "V16cV16ccIi", "nc", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_vec_set_v4si, "V4iV4iiIi", "nc", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_vec_set_v2di, "V2LLiV2LLiLLiIi", "nc", "sse4.1") // SSE 4.2 TARGET_BUILTIN(__builtin_ia32_pcmpistrm128, "V16cV16cV16cIc", "nc", "sse4.2") @@ -519,6 +528,14 @@ TARGET_BUILTIN(__builtin_ia32_maskstorepd, "vV2d*V2LLiV2d", "n", "avx") TARGET_BUILTIN(__builtin_ia32_maskstoreps, "vV4f*V4iV4f", "n", "avx") TARGET_BUILTIN(__builtin_ia32_maskstorepd256, "vV4d*V4LLiV4d", "n", "avx") TARGET_BUILTIN(__builtin_ia32_maskstoreps256, "vV8f*V8iV8f", "n", "avx") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v32qi, "cV32cIi", "nc", "avx") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v16hi, "sV16sIi", "nc", "avx") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v8si, "iV8iIi", "nc", "avx") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v4di, "LLiV4LLiIi", "nc", "avx") +TARGET_BUILTIN(__builtin_ia32_vec_set_v32qi, "V32cV32ccIi", "nc", "avx") +TARGET_BUILTIN(__builtin_ia32_vec_set_v16hi, "V16sV16ssIi", "nc", "avx") +TARGET_BUILTIN(__builtin_ia32_vec_set_v8si, "V8iV8iiIi", "nc", "avx") +TARGET_BUILTIN(__builtin_ia32_vec_set_v4di, "V4LLiV4LLiLLiIi", "nc", "avx") // AVX2 TARGET_BUILTIN(__builtin_ia32_mpsadbw256, "V32cV32cV32cIc", "nc", "avx2") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 6379f53d17f..ad2e1cf3b4b 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -8778,8 +8778,29 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateBitCast(BuildVector(Ops), llvm::Type::getX86_MMXTy(getLLVMContext())); case X86::BI__builtin_ia32_vec_ext_v2si: - return Builder.CreateExtractElement(Ops[0], - cast<ConstantInt>(Ops[1])->getZExtValue()); + case X86::BI__builtin_ia32_vec_ext_v16qi: + case X86::BI__builtin_ia32_vec_ext_v8hi: + case X86::BI__builtin_ia32_vec_ext_v4si: + case X86::BI__builtin_ia32_vec_ext_v4sf: + case X86::BI__builtin_ia32_vec_ext_v2di: + case X86::BI__builtin_ia32_vec_ext_v32qi: + case X86::BI__builtin_ia32_vec_ext_v16hi: + case X86::BI__builtin_ia32_vec_ext_v8si: + case X86::BI__builtin_ia32_vec_ext_v4di: + // These builtins exist so we can ensure the index is an ICE and in range. + // Otherwise we could just do this in the header file. + return Builder.CreateExtractElement(Ops[0], Ops[1]); + case X86::BI__builtin_ia32_vec_set_v16qi: + case X86::BI__builtin_ia32_vec_set_v8hi: + case X86::BI__builtin_ia32_vec_set_v4si: + case X86::BI__builtin_ia32_vec_set_v2di: + case X86::BI__builtin_ia32_vec_set_v32qi: + case X86::BI__builtin_ia32_vec_set_v16hi: + case X86::BI__builtin_ia32_vec_set_v8si: + case X86::BI__builtin_ia32_vec_set_v4di: + // These builtins exist so we can ensure the index is an ICE and in range. + // Otherwise we could just do this in the header file. + return Builder.CreateInsertElement(Ops[0], Ops[1], Ops[2]); case X86::BI_mm_setcsr: case X86::BI__builtin_ia32_ldmxcsr: { Address Tmp = CreateMemTemp(E->getArg(0)->getType()); diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 64a4d94aa95..883b2e350d4 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -2015,12 +2015,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) /// element is extracted and returned. /// \returns A 32-bit integer containing the extracted 32 bits of extended /// packed data. -static __inline int __DEFAULT_FN_ATTRS -_mm256_extract_epi32(__m256i __a, const int __imm) -{ - __v8si __b = (__v8si)__a; - return __b[__imm & 7]; -} +#define _mm256_extract_epi32(X, N) \ + (int)__builtin_ia32_vec_ext_v8si((__v8si)(__m256i)(X), (int)(N)) /// Takes a [16 x i16] vector and returns the vector element value /// indexed by the immediate constant operand. @@ -2037,12 +2033,9 @@ _mm256_extract_epi32(__m256i __a, const int __imm) /// element is extracted and returned. /// \returns A 32-bit integer containing the extracted 16 bits of zero extended /// packed data. -static __inline int __DEFAULT_FN_ATTRS -_mm256_extract_epi16(__m256i __a, const int __imm) -{ - __v16hi __b = (__v16hi)__a; - return (unsigned short)__b[__imm & 15]; -} +#define _mm256_extract_epi16(X, N) \ + (int)(unsigned short)__builtin_ia32_vec_ext_v16hi((__v16hi)(__m256i)(X), \ + (int)(N)) /// Takes a [32 x i8] vector and returns the vector element value /// indexed by the immediate constant operand. @@ -2059,12 +2052,9 @@ _mm256_extract_epi16(__m256i __a, const int __imm) /// element is extracted and returned. /// \returns A 32-bit integer containing the extracted 8 bits of zero extended /// packed data. -static __inline int __DEFAULT_FN_ATTRS -_mm256_extract_epi8(__m256i __a, const int __imm) -{ - __v32qi __b = (__v32qi)__a; - return (unsigned char)__b[__imm & 31]; -} +#define _mm256_extract_epi8(X, N) \ + (int)(unsigned char)__builtin_ia32_vec_ext_v32qi((__v32qi)(__m256i)(X), \ + (int)(N)) #ifdef __x86_64__ /// Takes a [4 x i64] vector and returns the vector element value @@ -2082,12 +2072,8 @@ _mm256_extract_epi8(__m256i __a, const int __imm) /// element is extracted and returned. /// \returns A 64-bit integer containing the extracted 64 bits of extended /// packed data. -static __inline long long __DEFAULT_FN_ATTRS -_mm256_extract_epi64(__m256i __a, const int __imm) -{ - __v4di __b = (__v4di)__a; - return __b[__imm & 3]; -} +#define _mm256_extract_epi64(X, N) \ + (long long)__builtin_ia32_vec_ext_v4di((__v4di)(__m256i)(X), (int)(N)) #endif /// Takes a [8 x i32] vector and replaces the vector element value @@ -2108,13 +2094,9 @@ _mm256_extract_epi64(__m256i __a, const int __imm) /// replaced. /// \returns A copy of vector \a __a, after replacing its element indexed by /// \a __imm with \a __b. -static __inline __m256i __DEFAULT_FN_ATTRS -_mm256_insert_epi32(__m256i __a, int __b, int const __imm) -{ - __v8si __c = (__v8si)__a; - __c[__imm & 7] = __b; - return (__m256i)__c; -} +#define _mm256_insert_epi32(X, I, N) \ + (__m256i)__builtin_ia32_vec_set_v8si((__v8si)(__m256i)(X), \ + (int)(I), (int)(N)) /// Takes a [16 x i16] vector and replaces the vector element value @@ -2135,13 +2117,9 @@ _mm256_insert_epi32(__m256i __a, int __b, int const __imm) /// replaced. /// \returns A copy of vector \a __a, after replacing its element indexed by /// \a __imm with \a __b. -static __inline __m256i __DEFAULT_FN_ATTRS -_mm256_insert_epi16(__m256i __a, int __b, int const __imm) -{ - __v16hi __c = (__v16hi)__a; - __c[__imm & 15] = __b; - return (__m256i)__c; -} +#define _mm256_insert_epi16(X, I, N) \ + (__m256i)__builtin_ia32_vec_set_v16hi((__v16hi)(__m256i)(X), \ + (int)(I), (int)(N)) /// Takes a [32 x i8] vector and replaces the vector element value /// indexed by the immediate constant operand with a new value. Returns the @@ -2161,13 +2139,9 @@ _mm256_insert_epi16(__m256i __a, int __b, int const __imm) /// replaced. /// \returns A copy of vector \a __a, after replacing its element indexed by /// \a __imm with \a __b. -static __inline __m256i __DEFAULT_FN_ATTRS -_mm256_insert_epi8(__m256i __a, int __b, int const __imm) -{ - __v32qi __c = (__v32qi)__a; - __c[__imm & 31] = __b; - return (__m256i)__c; -} +#define _mm256_insert_epi8(X, I, N) \ + (__m256i)__builtin_ia32_vec_set_v32qi((__v32qi)(__m256i)(X), \ + (int)(I), (int)(N)) #ifdef __x86_64__ /// Takes a [4 x i64] vector and replaces the vector element value @@ -2188,13 +2162,9 @@ _mm256_insert_epi8(__m256i __a, int __b, int const __imm) /// replaced. /// \returns A copy of vector \a __a, after replacing its element indexed by /// \a __imm with \a __b. -static __inline __m256i __DEFAULT_FN_ATTRS -_mm256_insert_epi64(__m256i __a, long long __b, int const __imm) -{ - __v4di __c = (__v4di)__a; - __c[__imm & 3] = __b; - return (__m256i)__c; -} +#define _mm256_insert_epi64(X, I, N) \ + (__m256i)__builtin_ia32_vec_set_v4di((__v4di)(__m256i)(X), \ + (long long)(I), (int)(N)) #endif /* Conversion */ diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 1af1fcb377d..605fb7993fd 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -4304,12 +4304,9 @@ _mm_packus_epi16(__m128i __a, __m128i __b) /// 111: assign values from bits [127:112] of \a __a. /// \returns An integer, whose lower 16 bits are selected from the 128-bit /// integer vector parameter and the remaining bits are assigned zeros. -static __inline__ int __DEFAULT_FN_ATTRS -_mm_extract_epi16(__m128i __a, int __imm) -{ - __v8hi __b = (__v8hi)__a; - return (unsigned short)__b[__imm & 7]; -} +#define _mm_extract_epi16(a, imm) \ + (int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \ + (int)(imm)) /// Constructs a 128-bit integer vector by first making a copy of the /// 128-bit integer vector parameter, and then inserting the lower 16 bits @@ -4331,13 +4328,9 @@ _mm_extract_epi16(__m128i __a, int __imm) /// An immediate value specifying the bit offset in the result at which the /// lower 16 bits of \a __b are written. /// \returns A 128-bit integer vector containing the constructed values. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_insert_epi16(__m128i __a, int __b, int __imm) -{ - __v8hi __c = (__v8hi)__a; - __c[__imm & 7] = __b; - return (__m128i)__c; -} +#define _mm_insert_epi16(a, b, imm) \ + (__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \ + (int)(imm)) /// Copies the values of the most significant bits from each 8-bit /// element in a 128-bit integer vector of [16 x i8] to create a 16-bit mask diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h index 494e21bea3c..b3084562a0b 100644 --- a/clang/lib/Headers/smmintrin.h +++ b/clang/lib/Headers/smmintrin.h @@ -893,15 +893,14 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) /// 11: Bits [127:96] of parameter \a X are returned. /// \returns A 32-bit integer containing the extracted 32 bits of float data. #define _mm_extract_ps(X, N) (__extension__ \ - ({ union { int __i; float __f; } __t; \ - __v4sf __a = (__v4sf)(__m128)(X); \ - __t.__f = __a[(N) & 3]; \ - __t.__i;})) + ({ union { int __i; float __f; } __t; \ + __t.__f = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); \ + __t.__i;})) /* Miscellaneous insert and extract macros. */ /* Extract a single-precision float from X at index N into D. */ -#define _MM_EXTRACT_FLOAT(D, X, N) (__extension__ ({ __v4sf __a = (__v4sf)(X); \ - (D) = __a[N]; })) +#define _MM_EXTRACT_FLOAT(D, X, N) \ + { (D) = __builtin_ia32_vec_ext_v4sf((__v4sf)(__m128)(X), (int)(N)); } /* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create an index suitable for _mm_insert_ps. */ @@ -952,10 +951,9 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) /// 1110: Bits [119:112] of the result are used for insertion. \n /// 1111: Bits [127:120] of the result are used for insertion. /// \returns A 128-bit integer vector containing the constructed values. -#define _mm_insert_epi8(X, I, N) (__extension__ \ - ({ __v16qi __a = (__v16qi)(__m128i)(X); \ - __a[(N) & 15] = (I); \ - (__m128i)__a;})) +#define _mm_insert_epi8(X, I, N) \ + (__m128i)__builtin_ia32_vec_set_v16qi((__v16qi)(__m128i)(X), \ + (int)(I), (int)(N)) /// Constructs a 128-bit vector of [4 x i32] by first making a copy of /// the 128-bit integer vector parameter, and then inserting the 32-bit @@ -985,10 +983,9 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) /// 10: Bits [95:64] of the result are used for insertion. \n /// 11: Bits [127:96] of the result are used for insertion. /// \returns A 128-bit integer vector containing the constructed values. -#define _mm_insert_epi32(X, I, N) (__extension__ \ - ({ __v4si __a = (__v4si)(__m128i)(X); \ - __a[(N) & 3] = (I); \ - (__m128i)__a;})) +#define _mm_insert_epi32(X, I, N) \ + (__m128i)__builtin_ia32_vec_set_v4si((__v4si)(__m128i)(X), \ + (int)(I), (int)(N)) #ifdef __x86_64__ /// Constructs a 128-bit vector of [2 x i64] by first making a copy of @@ -1017,10 +1014,9 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) /// 0: Bits [63:0] of the result are used for insertion. \n /// 1: Bits [127:64] of the result are used for insertion. \n /// \returns A 128-bit integer vector containing the constructed values. -#define _mm_insert_epi64(X, I, N) (__extension__ \ - ({ __v2di __a = (__v2di)(__m128i)(X); \ - __a[(N) & 1] = (I); \ - (__m128i)__a;})) +#define _mm_insert_epi64(X, I, N) \ + (__m128i)__builtin_ia32_vec_set_v2di((__v2di)(__m128i)(X), \ + (long long)(I), (int)(N)) #endif /* __x86_64__ */ /* Extract int from packed integer array at index. This returns the element @@ -1061,9 +1057,9 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) /// \returns An unsigned integer, whose lower 8 bits are selected from the /// 128-bit integer vector parameter and the remaining bits are assigned /// zeros. -#define _mm_extract_epi8(X, N) (__extension__ \ - ({ __v16qi __a = (__v16qi)(__m128i)(X); \ - (int)(unsigned char) __a[(N) & 15];})) +#define _mm_extract_epi8(X, N) \ + (int)(unsigned char)__builtin_ia32_vec_ext_v16qi((__v16qi)(__m128i)(X), \ + (int)(N)) /// Extracts a 32-bit element from the 128-bit integer vector of /// [4 x i32], using the immediate value parameter \a N as a selector. @@ -1087,9 +1083,8 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) /// 11: Bits [127:96] of the parameter \a X are exracted. /// \returns An integer, whose lower 32 bits are selected from the 128-bit /// integer vector parameter and the remaining bits are assigned zeros. -#define _mm_extract_epi32(X, N) (__extension__ \ - ({ __v4si __a = (__v4si)(__m128i)(X); \ - (int)__a[(N) & 3];})) +#define _mm_extract_epi32(X, N) \ + (int)__builtin_ia32_vec_ext_v4si((__v4si)(__m128i)(X), (int)(N)) #ifdef __x86_64__ /// Extracts a 64-bit element from the 128-bit integer vector of @@ -1111,9 +1106,8 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2) /// 0: Bits [63:0] are returned. \n /// 1: Bits [127:64] are returned. \n /// \returns A 64-bit integer. -#define _mm_extract_epi64(X, N) (__extension__ \ - ({ __v2di __a = (__v2di)(__m128i)(X); \ - (long long)__a[(N) & 1];})) +#define _mm_extract_epi64(X, N) \ + (long long)__builtin_ia32_vec_ext_v2di((__v2di)(__m128i)(X), (int)(N)) #endif /* __x86_64 */ /* SSE4 128-bit Packed Integer Comparisons. */ diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 643c103c8a3..b4fdcc0f7e9 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2552,17 +2552,28 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { switch (BuiltinID) { default: return false; - case X86::BI_mm_prefetch: - i = 1; l = 0; u = 7; - break; case X86::BI__builtin_ia32_vec_ext_v2si: + case X86::BI__builtin_ia32_vec_ext_v2di: i = 1; l = 0; u = 1; break; + case X86::BI__builtin_ia32_vec_set_v2di: + i = 2; l = 0; u = 1; + break; case X86::BI__builtin_ia32_vec_ext_v4hi: + case X86::BI__builtin_ia32_vec_ext_v4si: + case X86::BI__builtin_ia32_vec_ext_v4sf: + case X86::BI__builtin_ia32_vec_ext_v4di: i = 1; l = 0; u = 3; break; + case X86::BI_mm_prefetch: + case X86::BI__builtin_ia32_vec_ext_v8hi: + case X86::BI__builtin_ia32_vec_ext_v8si: + i = 1; l = 0; u = 7; + break; case X86::BI__builtin_ia32_sha1rnds4: case X86::BI__builtin_ia32_vec_set_v4hi: + case X86::BI__builtin_ia32_vec_set_v4si: + case X86::BI__builtin_ia32_vec_set_v4di: i = 2; l = 0; u = 3; break; case X86::BI__builtin_ia32_vpermil2pd: @@ -2603,12 +2614,16 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_vpcomw: case X86::BI__builtin_ia32_vpcomd: case X86::BI__builtin_ia32_vpcomq: + case X86::BI__builtin_ia32_vec_set_v8hi: + case X86::BI__builtin_ia32_vec_set_v8si: i = 2; l = 0; u = 7; break; case X86::BI__builtin_ia32_roundps: case X86::BI__builtin_ia32_roundpd: case X86::BI__builtin_ia32_roundps256: case X86::BI__builtin_ia32_roundpd256: + case X86::BI__builtin_ia32_vec_ext_v16qi: + case X86::BI__builtin_ia32_vec_ext_v16hi: i = 1; l = 0; u = 15; break; case X86::BI__builtin_ia32_roundss: @@ -2621,8 +2636,13 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_rangeps512_mask: case X86::BI__builtin_ia32_getmantsd_round_mask: case X86::BI__builtin_ia32_getmantss_round_mask: + case X86::BI__builtin_ia32_vec_set_v16qi: + case X86::BI__builtin_ia32_vec_set_v16hi: i = 2; l = 0; u = 15; break; + case X86::BI__builtin_ia32_vec_ext_v32qi: + i = 1; l = 0; u = 31; + break; case X86::BI__builtin_ia32_cmpps: case X86::BI__builtin_ia32_cmpss: case X86::BI__builtin_ia32_cmppd: @@ -2637,6 +2657,7 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_cmppd512_mask: case X86::BI__builtin_ia32_cmpsd_mask: case X86::BI__builtin_ia32_cmpss_mask: + case X86::BI__builtin_ia32_vec_set_v32qi: i = 2; l = 0; u = 31; break; case X86::BI__builtin_ia32_vcvtps2ph: diff --git a/clang/test/CodeGen/avx-builtins.c b/clang/test/CodeGen/avx-builtins.c index 4c904ee71fc..43c502f47f1 100644 --- a/clang/test/CodeGen/avx-builtins.c +++ b/clang/test/CodeGen/avx-builtins.c @@ -316,32 +316,28 @@ __m256 test_mm256_dp_ps(__m256 A, __m256 B) { int test_mm256_extract_epi8(__m256i A) { // CHECK-LABEL: test_mm256_extract_epi8 - // CHECK: and i32 %{{.*}}, 31 - // CHECK: extractelement <32 x i8> %{{.*}}, i32 %{{.*}} + // CHECK: extractelement <32 x i8> %{{.*}}, i32 31 // CHECK: zext i8 %{{.*}} to i32 - return _mm256_extract_epi8(A, 32); + return _mm256_extract_epi8(A, 31); } int test_mm256_extract_epi16(__m256i A) { // CHECK-LABEL: test_mm256_extract_epi16 - // CHECK: and i32 %{{.*}}, 15 - // CHECK: extractelement <16 x i16> %{{.*}}, i32 %{{.*}} + // CHECK: extractelement <16 x i16> %{{.*}}, i32 15 // CHECK: zext i16 %{{.*}} to i32 - return _mm256_extract_epi16(A, 16); + return _mm256_extract_epi16(A, 15); } int test_mm256_extract_epi32(__m256i A) { // CHECK-LABEL: test_mm256_extract_epi32 - // CHECK: and i32 %{{.*}}, 7 - // CHECK: extractelement <8 x i32> %{{.*}}, i32 %{{.*}} - return _mm256_extract_epi32(A, 8); + // CHECK: extractelement <8 x i32> %{{.*}}, i32 7 + return _mm256_extract_epi32(A, 7); } long long test_mm256_extract_epi64(__m256i A) { // CHECK-LABEL: test_mm256_extract_epi64 - // CHECK: and i32 %{{.*}}, 3 - // CHECK: extractelement <4 x i64> %{{.*}}, i32 %{{.*}} - return _mm256_extract_epi64(A, 5); + // CHECK: extractelement <4 x i64> %{{.*}}, i32 3 + return _mm256_extract_epi64(A, 3); } __m128d test_mm256_extractf128_pd(__m256d A) { @@ -400,29 +396,25 @@ __m256 test_mm256_hsub_ps(__m256 A, __m256 B) { __m256i test_mm256_insert_epi8(__m256i x, char b) { // CHECK-LABEL: test_mm256_insert_epi8 - // CHECK: and i32 %{{.*}}, 31 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 %{{.*}} - return _mm256_insert_epi8(x, b, 17); + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 14 + return _mm256_insert_epi8(x, b, 14); } __m256i test_mm256_insert_epi16(__m256i x, int b) { // CHECK-LABEL: test_mm256_insert_epi16 - // CHECK: and i32 %{{.*}}, 15 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 %{{.*}} + // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 4 return _mm256_insert_epi16(x, b, 4); } __m256i test_mm256_insert_epi32(__m256i x, int b) { // CHECK-LABEL: test_mm256_insert_epi32 - // CHECK: and i32 %{{.*}}, 7 - // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}} + // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 5 return _mm256_insert_epi32(x, b, 5); } __m256i test_mm256_insert_epi64(__m256i x, long long b) { // CHECK-LABEL: test_mm256_insert_epi64 - // CHECK: and i32 %{{.*}}, 3 - // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 %{{.*}} + // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 2 return _mm256_insert_epi64(x, b, 2); } diff --git a/clang/test/CodeGen/sse2-builtins.c b/clang/test/CodeGen/sse2-builtins.c index f7094b2fbd0..c0a44795bf4 100644 --- a/clang/test/CodeGen/sse2-builtins.c +++ b/clang/test/CodeGen/sse2-builtins.c @@ -613,17 +613,15 @@ __m128d test_mm_div_sd(__m128d A, __m128d B) { // Lowering to pextrw requires optimization. int test_mm_extract_epi16(__m128i A) { // CHECK-LABEL: test_mm_extract_epi16 - // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7 - // CHECK: extractelement <8 x i16> %{{.*}}, i32 [[x]] + // CHECK: extractelement <8 x i16> %{{.*}}, i32 1 // CHECK: zext i16 %{{.*}} to i32 - return _mm_extract_epi16(A, 9); + return _mm_extract_epi16(A, 1); } __m128i test_mm_insert_epi16(__m128i A, int B) { // CHECK-LABEL: test_mm_insert_epi16 - // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7 - // CHECK: insertelement <8 x i16> %{{.*}}, i32 [[x]] - return _mm_insert_epi16(A, B, 8); + // CHECK: insertelement <8 x i16> %{{.*}}, i32 0 + return _mm_insert_epi16(A, B, 0); } void test_mm_lfence() { diff --git a/clang/test/CodeGen/sse41-builtins.c b/clang/test/CodeGen/sse41-builtins.c index a99ecdabe44..6d6e11e9622 100644 --- a/clang/test/CodeGen/sse41-builtins.c +++ b/clang/test/CodeGen/sse41-builtins.c @@ -220,20 +220,20 @@ __m128 test_mm_floor_ss(__m128 x, __m128 y) { __m128i test_mm_insert_epi8(__m128i x, char b) { // CHECK-LABEL: test_mm_insert_epi8 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 0 - return _mm_insert_epi8(x, b, 16); + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1 + return _mm_insert_epi8(x, b, 1); } __m128i test_mm_insert_epi32(__m128i x, int b) { // CHECK-LABEL: test_mm_insert_epi32 - // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 0 - return _mm_insert_epi32(x, b, 4); + // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1 + return _mm_insert_epi32(x, b, 1); } __m128i test_mm_insert_epi64(__m128i x, long long b) { // CHECK-LABEL: test_mm_insert_epi64 - // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 0 - return _mm_insert_epi64(x, b, 2); + // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1 + return _mm_insert_epi64(x, b, 1); } __m128 test_mm_insert_ps(__m128 x, __m128 y) { diff --git a/clang/test/CodeGen/target-features-error-2.c b/clang/test/CodeGen/target-features-error-2.c index 683d9ab99ef..40279fb6dea 100644 --- a/clang/test/CodeGen/target-features-error-2.c +++ b/clang/test/CodeGen/target-features-error-2.c @@ -9,7 +9,7 @@ #if NEED_SSE42 int baz(__m256i a) { - return _mm256_extract_epi32(a, 3); // expected-error {{always_inline function '_mm256_extract_epi32' requires target feature 'sse4.2', but would be inlined into function 'baz' that is compiled without support for 'sse4.2'}} + return _mm256_extract_epi32(a, 3); // expected-error {{'__builtin_ia32_vec_ext_v8si' needs target feature avx}} } #endif diff --git a/clang/test/CodeGen/vector.c b/clang/test/CodeGen/vector.c index ebaea841aa8..98dd82a28a0 100644 --- a/clang/test/CodeGen/vector.c +++ b/clang/test/CodeGen/vector.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -ffreestanding -triple i386-apple-darwin9 -O1 -target-cpu core2 -debug-info-kind=limited -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -ffreestanding -triple i386-apple-darwin9 -O1 -target-cpu corei7 -debug-info-kind=limited -emit-llvm %s -o - | FileCheck %s typedef short __v4hi __attribute__ ((__vector_size__ (8))); void test1() { |