diff options
Diffstat (limited to 'clang')
-rw-r--r-- | clang/include/clang/Basic/BuiltinsX86.def | 32 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 69 | ||||
-rw-r--r-- | clang/lib/Headers/avx2intrin.h | 13 | ||||
-rw-r--r-- | clang/lib/Headers/avx512dqintrin.h | 96 | ||||
-rw-r--r-- | clang/lib/Headers/avx512fintrin.h | 92 | ||||
-rw-r--r-- | clang/lib/Headers/avx512vldqintrin.h | 26 | ||||
-rw-r--r-- | clang/lib/Headers/avx512vlintrin.h | 38 | ||||
-rw-r--r-- | clang/lib/Headers/avxintrin.h | 51 | ||||
-rw-r--r-- | clang/lib/Sema/SemaChecking.cpp | 32 | ||||
-rw-r--r-- | clang/test/CodeGen/avx-builtins.c | 28 | ||||
-rw-r--r-- | clang/test/CodeGen/avx-shuffle-builtins.c | 8 | ||||
-rw-r--r-- | clang/test/CodeGen/avx2-builtins.c | 16 | ||||
-rw-r--r-- | clang/test/CodeGen/avx512dq-builtins.c | 24 | ||||
-rw-r--r-- | clang/test/CodeGen/avx512f-builtins.c | 24 | ||||
-rw-r--r-- | clang/test/CodeGen/avx512vl-builtins.c | 12 | ||||
-rw-r--r-- | clang/test/CodeGen/avx512vldq-builtins.c | 12 |
16 files changed, 243 insertions, 330 deletions
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 7cc5e6060a0..c63289e25d5 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -491,6 +491,9 @@ TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "nc", "avx") +TARGET_BUILTIN(__builtin_ia32_vextractf128_pd256, "V2dV4dIc", "nc", "avx") +TARGET_BUILTIN(__builtin_ia32_vextractf128_ps256, "V4fV8fIc", "nc", "avx") +TARGET_BUILTIN(__builtin_ia32_vextractf128_si256, "V4iV8iIc", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "nc", "avx") @@ -503,6 +506,9 @@ TARGET_BUILTIN(__builtin_ia32_vpermilpd, "V2dV2dIi", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_vpermilps, "V4fV4fIi", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_vpermilpd256, "V4dV4dIi", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_vpermilps256, "V8fV8fIi", "nc", "avx") +TARGET_BUILTIN(__builtin_ia32_vinsertf128_pd256, "V4dV4dV2dIc", "nc", "avx") +TARGET_BUILTIN(__builtin_ia32_vinsertf128_ps256, "V8fV8fV4fIc", "nc", "avx") +TARGET_BUILTIN(__builtin_ia32_vinsertf128_si256, "V8iV8iV4iIc", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_sqrtpd256, "V4dV4d", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_sqrtps256, "V8fV8f", "nc", "avx") TARGET_BUILTIN(__builtin_ia32_rsqrtps256, "V8fV8f", "nc", "avx") @@ -618,6 +624,8 @@ TARGET_BUILTIN(__builtin_ia32_pblendd256, "V8iV8iV8iIi", "nc", "avx2") TARGET_BUILTIN(__builtin_ia32_permvarsi256, "V8iV8iV8i", "nc", "avx2") TARGET_BUILTIN(__builtin_ia32_permvarsf256, "V8fV8fV8i", "nc", "avx2") TARGET_BUILTIN(__builtin_ia32_permti256, "V4LLiV4LLiV4LLiIc", "nc", "avx2") +TARGET_BUILTIN(__builtin_ia32_extract128i256, "V2LLiV4LLiIc", "nc", "avx2") +TARGET_BUILTIN(__builtin_ia32_insert128i256, "V4LLiV4LLiV2LLiIc", "nc", "avx2") TARGET_BUILTIN(__builtin_ia32_maskloadd256, "V8iV8iC*V8i", "n", "avx2") TARGET_BUILTIN(__builtin_ia32_maskloadq256, "V4LLiV4LLiC*V4LLi", "n", "avx2") TARGET_BUILTIN(__builtin_ia32_maskloadd, "V4iV4iC*V4i", "n", "avx2") @@ -927,6 +935,8 @@ TARGET_BUILTIN(__builtin_ia32_alignd128, "V4iV4iV4iIi", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_alignd256, "V8iV8iV8iIi", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_alignq128, "V2LLiV2LLiV2LLiIi", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_alignq256, "V4LLiV4LLiV4LLiIi", "nc", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_extractf64x4, "V4dV8dIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_extractf32x4, "V4fV16fIi", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_vpdpbusd128, "V4iV4iV4iV4i", "nc", "avx512vl,avx512vnni") TARGET_BUILTIN(__builtin_ia32_vpdpbusd256, "V8iV8iV8iV8i", "nc", "avx512vl,avx512vnni") @@ -1646,6 +1656,28 @@ TARGET_BUILTIN(__builtin_ia32_pmovqw128_mask, "V8sV2LLiV8sUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovqw128mem_mask, "vV8s*V2LLiUc", "n", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovqw256_mask, "V8sV4LLiV8sUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovqw256mem_mask, "vV8s*V4LLiUc", "n", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_extractf32x8, "V8fV16fIi", "nc", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_extractf64x2_512, "V2dV8dIi", "nc", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_extracti32x8, "V8iV16iIi", "nc", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_extracti64x2_512, "V2LLiV8LLiIi", "nc", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_extracti32x4, "V4iV16iIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_extracti64x4, "V4LLiV8LLiIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_extractf64x2_256, "V2dV4dIi", "nc", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_extracti64x2_256, "V2LLiV4LLiIi", "nc", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_extractf32x4_256, "V4fV8fIi", "nc", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_extracti32x4_256, "V4iV8iIi", "nc", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_insertf32x8, "V16fV16fV8fIi", "nc", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_insertf64x2_512, "V8dV8dV2dIi", "nc", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_inserti32x8, "V16iV16iV8iIi", "nc", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_inserti64x2_512, "V8LLiV8LLiV2LLiIi", "nc", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_insertf64x4, "V8dV8dV4dIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_inserti64x4, "V8LLiV8LLiV4LLiIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_insertf64x2_256, "V4dV4dV2dIi", "nc", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_inserti64x2_256, "V4LLiV4LLiV2LLiIi", "nc", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_insertf32x4_256, "V8fV8fV4fIi", "nc", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_inserti32x4_256, "V8iV8iV4iIi", "nc", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_insertf32x4, "V16fV16fV4fIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_inserti32x4, "V16iV16iV4iIi", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_getmantpd128_mask, "V2dV2diV2dUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_getmantpd256_mask, "V4dV4diV4dUc", "nc", "avx512vl") TARGET_BUILTIN(__builtin_ia32_getmantps128_mask, "V4fV4fiV4fUc", "nc", "avx512vl") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 4331005cb35..025b34e809c 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9235,6 +9235,75 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } + case X86::BI__builtin_ia32_vextractf128_pd256: + case X86::BI__builtin_ia32_vextractf128_ps256: + case X86::BI__builtin_ia32_vextractf128_si256: + case X86::BI__builtin_ia32_extract128i256: + case X86::BI__builtin_ia32_extractf64x4: + case X86::BI__builtin_ia32_extractf32x4: + case X86::BI__builtin_ia32_extracti64x4: + case X86::BI__builtin_ia32_extracti32x4: + case X86::BI__builtin_ia32_extractf32x8: + case X86::BI__builtin_ia32_extracti32x8: + case X86::BI__builtin_ia32_extractf32x4_256: + case X86::BI__builtin_ia32_extracti32x4_256: + case X86::BI__builtin_ia32_extractf64x2_256: + case X86::BI__builtin_ia32_extracti64x2_256: + case X86::BI__builtin_ia32_extractf64x2_512: + case X86::BI__builtin_ia32_extracti64x2_512: { + llvm::Type *DstTy = ConvertType(E->getType()); + unsigned NumElts = DstTy->getVectorNumElements(); + unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue() * NumElts; + + uint32_t Indices[16]; + for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = i + Index; + + return Builder.CreateShuffleVector(Ops[0], + UndefValue::get(Ops[0]->getType()), + makeArrayRef(Indices, NumElts), + "extract"); + } + case X86::BI__builtin_ia32_vinsertf128_pd256: + case X86::BI__builtin_ia32_vinsertf128_ps256: + case X86::BI__builtin_ia32_vinsertf128_si256: + case X86::BI__builtin_ia32_insert128i256: + case X86::BI__builtin_ia32_insertf64x4: + case X86::BI__builtin_ia32_insertf32x4: + case X86::BI__builtin_ia32_inserti64x4: + case X86::BI__builtin_ia32_inserti32x4: + case X86::BI__builtin_ia32_insertf32x8: + case X86::BI__builtin_ia32_inserti32x8: + case X86::BI__builtin_ia32_insertf32x4_256: + case X86::BI__builtin_ia32_inserti32x4_256: + case X86::BI__builtin_ia32_insertf64x2_256: + case X86::BI__builtin_ia32_inserti64x2_256: + case X86::BI__builtin_ia32_insertf64x2_512: + case X86::BI__builtin_ia32_inserti64x2_512: { + unsigned DstNumElts = Ops[0]->getType()->getVectorNumElements(); + unsigned SrcNumElts = Ops[1]->getType()->getVectorNumElements(); + unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue() * SrcNumElts; + + uint32_t Indices[16]; + for (unsigned i = 0; i != DstNumElts; ++i) + Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i; + + Value *Op1 = Builder.CreateShuffleVector(Ops[1], + UndefValue::get(Ops[1]->getType()), + makeArrayRef(Indices, DstNumElts), + "widen"); + + for (unsigned i = 0; i != DstNumElts; ++i) { + if (i >= Index && i < (Index + SrcNumElts)) + Indices[i] = (i - Index) + DstNumElts; + else + Indices[i] = i; + } + + return Builder.CreateShuffleVector(Ops[0], Op1, + makeArrayRef(Indices, DstNumElts), + "insert"); + } case X86::BI__builtin_ia32_pblendw128: case X86::BI__builtin_ia32_blendpd: case X86::BI__builtin_ia32_blendps: diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index d1c530693bf..3867af08ccc 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -881,18 +881,11 @@ _mm256_permutevar8x32_ps(__m256 __a, __m256i __b) (__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (M)) #define _mm256_extracti128_si256(V, M) \ - (__m128i)__builtin_shufflevector((__v4di)(__m256i)(V), \ - (__v4di)_mm256_undefined_si256(), \ - (((M) & 1) ? 2 : 0), \ - (((M) & 1) ? 3 : 1) ) + (__m128i)__builtin_ia32_extract128i256((__v4di)(__m256i)(V), (int)(M)) #define _mm256_inserti128_si256(V1, V2, M) \ - (__m256i)__builtin_shufflevector((__v4di)(__m256i)(V1), \ - (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \ - (((M) & 1) ? 0 : 4), \ - (((M) & 1) ? 1 : 5), \ - (((M) & 1) ? 4 : 2), \ - (((M) & 1) ? 5 : 3) ) + (__m256i)__builtin_ia32_insert128i256((__v4di)(__m256i)(V1), \ + (__v2di)(__m128i)(V2), (int)(M)) static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskload_epi32(int const *__X, __m256i __M) diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index 9ef78041d1c..fbb4bbce8df 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -1103,16 +1103,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) } #define _mm512_extractf32x8_ps(A, imm) \ - (__m256)__builtin_shufflevector((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_undefined_ps(), \ - ((imm) & 1) ? 8 : 0, \ - ((imm) & 1) ? 9 : 1, \ - ((imm) & 1) ? 10 : 2, \ - ((imm) & 1) ? 11 : 3, \ - ((imm) & 1) ? 12 : 4, \ - ((imm) & 1) ? 13 : 5, \ - ((imm) & 1) ? 14 : 6, \ - ((imm) & 1) ? 15 : 7) + (__m256)__builtin_ia32_extractf32x8((__v16sf)(__m512)(A), (int)(imm)) #define _mm512_mask_extractf32x8_ps(W, U, A, imm) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ @@ -1125,10 +1116,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) (__v8sf)_mm256_setzero_ps()) #define _mm512_extractf64x2_pd(A, imm) \ - (__m128d)__builtin_shufflevector((__v8df)(__m512d)(A), \ - (__v8df)_mm512_undefined_pd(), \ - 0 + ((imm) & 0x3) * 2, \ - 1 + ((imm) & 0x3) * 2) + (__m128d)__builtin_ia32_extractf64x2_512((__v8df)(__m512d)(A), (int)(imm)) #define _mm512_mask_extractf64x2_pd(W, U, A, imm) \ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ @@ -1141,16 +1129,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) (__v2df)_mm_setzero_pd()) #define _mm512_extracti32x8_epi32(A, imm) \ - (__m256i)__builtin_shufflevector((__v16si)(__m512i)(A), \ - (__v16si)_mm512_undefined_epi32(), \ - ((imm) & 1) ? 8 : 0, \ - ((imm) & 1) ? 9 : 1, \ - ((imm) & 1) ? 10 : 2, \ - ((imm) & 1) ? 11 : 3, \ - ((imm) & 1) ? 12 : 4, \ - ((imm) & 1) ? 13 : 5, \ - ((imm) & 1) ? 14 : 6, \ - ((imm) & 1) ? 15 : 7) + (__m256i)__builtin_ia32_extracti32x8((__v16si)(__m512i)(A), (int)(imm)) #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ @@ -1163,10 +1142,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) (__v8si)_mm256_setzero_si256()) #define _mm512_extracti64x2_epi64(A, imm) \ - (__m128i)__builtin_shufflevector((__v8di)(__m512i)(A), \ - (__v8di)_mm512_undefined_epi32(), \ - 0 + ((imm) & 0x3) * 2, \ - 1 + ((imm) & 0x3) * 2) + (__m128i)__builtin_ia32_extracti64x2_512((__v8di)(__m512i)(A), (int)(imm)) #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \ (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \ @@ -1179,24 +1155,8 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) (__v2di)_mm_setzero_si128()) #define _mm512_insertf32x8(A, B, imm) \ - (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_castps256_ps512((__m256)(B)),\ - ((imm) & 0x1) ? 0 : 16, \ - ((imm) & 0x1) ? 1 : 17, \ - ((imm) & 0x1) ? 2 : 18, \ - ((imm) & 0x1) ? 3 : 19, \ - ((imm) & 0x1) ? 4 : 20, \ - ((imm) & 0x1) ? 5 : 21, \ - ((imm) & 0x1) ? 6 : 22, \ - ((imm) & 0x1) ? 7 : 23, \ - ((imm) & 0x1) ? 16 : 8, \ - ((imm) & 0x1) ? 17 : 9, \ - ((imm) & 0x1) ? 18 : 10, \ - ((imm) & 0x1) ? 19 : 11, \ - ((imm) & 0x1) ? 20 : 12, \ - ((imm) & 0x1) ? 21 : 13, \ - ((imm) & 0x1) ? 22 : 14, \ - ((imm) & 0x1) ? 23 : 15) + (__m512)__builtin_ia32_insertf32x8((__v16sf)(__m512)(A), \ + (__v8sf)(__m256)(B), (int)(imm)) #define _mm512_mask_insertf32x8(W, U, A, B, imm) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ @@ -1209,16 +1169,8 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) (__v16sf)_mm512_setzero_ps()) #define _mm512_insertf64x2(A, B, imm) \ - (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \ - (__v8df)_mm512_castpd128_pd512((__m128d)(B)),\ - (((imm) & 0x3) == 0) ? 8 : 0, \ - (((imm) & 0x3) == 0) ? 9 : 1, \ - (((imm) & 0x3) == 1) ? 8 : 2, \ - (((imm) & 0x3) == 1) ? 9 : 3, \ - (((imm) & 0x3) == 2) ? 8 : 4, \ - (((imm) & 0x3) == 2) ? 9 : 5, \ - (((imm) & 0x3) == 3) ? 8 : 6, \ - (((imm) & 0x3) == 3) ? 9 : 7) + (__m512d)__builtin_ia32_insertf64x2_512((__v8df)(__m512d)(A), \ + (__v2df)(__m128d)(B), (int)(imm)) #define _mm512_mask_insertf64x2(W, U, A, B, imm) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ @@ -1231,24 +1183,8 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) (__v8df)_mm512_setzero_pd()) #define _mm512_inserti32x8(A, B, imm) \ - (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \ - (__v16si)_mm512_castsi256_si512((__m256i)(B)),\ - ((imm) & 0x1) ? 0 : 16, \ - ((imm) & 0x1) ? 1 : 17, \ - ((imm) & 0x1) ? 2 : 18, \ - ((imm) & 0x1) ? 3 : 19, \ - ((imm) & 0x1) ? 4 : 20, \ - ((imm) & 0x1) ? 5 : 21, \ - ((imm) & 0x1) ? 6 : 22, \ - ((imm) & 0x1) ? 7 : 23, \ - ((imm) & 0x1) ? 16 : 8, \ - ((imm) & 0x1) ? 17 : 9, \ - ((imm) & 0x1) ? 18 : 10, \ - ((imm) & 0x1) ? 19 : 11, \ - ((imm) & 0x1) ? 20 : 12, \ - ((imm) & 0x1) ? 21 : 13, \ - ((imm) & 0x1) ? 22 : 14, \ - ((imm) & 0x1) ? 23 : 15) + (__m512i)__builtin_ia32_inserti32x8((__v16si)(__m512i)(A), \ + (__v8si)(__m256i)(B), (int)(imm)) #define _mm512_mask_inserti32x8(W, U, A, B, imm) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ @@ -1261,16 +1197,8 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) (__v16si)_mm512_setzero_si512()) #define _mm512_inserti64x2(A, B, imm) \ - (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \ - (__v8di)_mm512_castsi128_si512((__m128i)(B)),\ - (((imm) & 0x3) == 0) ? 8 : 0, \ - (((imm) & 0x3) == 0) ? 9 : 1, \ - (((imm) & 0x3) == 1) ? 8 : 2, \ - (((imm) & 0x3) == 1) ? 9 : 3, \ - (((imm) & 0x3) == 2) ? 8 : 4, \ - (((imm) & 0x3) == 2) ? 9 : 5, \ - (((imm) & 0x3) == 3) ? 8 : 6, \ - (((imm) & 0x3) == 3) ? 9 : 7) + (__m512i)__builtin_ia32_inserti64x2_512((__v8di)(__m512i)(A), \ + (__v2di)(__m128i)(B), (int)(imm)) #define _mm512_mask_inserti64x2(W, U, A, B, imm) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index ccc445a6acd..4ae235e6330 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -3494,12 +3494,7 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, /* Vector Extract */ #define _mm512_extractf64x4_pd(A, I) \ - (__m256d)__builtin_shufflevector((__v8df)(__m512d)(A), \ - (__v8df)_mm512_undefined_pd(), \ - ((I) & 1) ? 4 : 0, \ - ((I) & 1) ? 5 : 1, \ - ((I) & 1) ? 6 : 2, \ - ((I) & 1) ? 7 : 3) + (__m256d)__builtin_ia32_extractf64x4((__v8df)(__m512d)(A), (int)(I)) #define _mm512_mask_extractf64x4_pd(W, U, A, imm) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ @@ -3512,12 +3507,7 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, (__v4df)_mm256_setzero_pd()) #define _mm512_extractf32x4_ps(A, I) \ - (__m128)__builtin_shufflevector((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_undefined_ps(), \ - 0 + ((I) & 0x3) * 4, \ - 1 + ((I) & 0x3) * 4, \ - 2 + ((I) & 0x3) * 4, \ - 3 + ((I) & 0x3) * 4) + (__m128)__builtin_ia32_extractf32x4((__v16sf)(__m512)(A), (int)(I)) #define _mm512_mask_extractf32x4_ps(W, U, A, imm) \ (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ @@ -7544,12 +7534,7 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) } #define _mm512_extracti32x4_epi32(A, imm) \ - (__m128i)__builtin_shufflevector((__v16si)(__m512i)(A), \ - (__v16si)_mm512_undefined_epi32(), \ - 0 + ((imm) & 0x3) * 4, \ - 1 + ((imm) & 0x3) * 4, \ - 2 + ((imm) & 0x3) * 4, \ - 3 + ((imm) & 0x3) * 4) + (__m128i)__builtin_ia32_extracti32x4((__v16si)(__m512i)(A), (int)(imm)) #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \ (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ @@ -7562,12 +7547,7 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) (__v4si)_mm_setzero_si128()) #define _mm512_extracti64x4_epi64(A, imm) \ - (__m256i)__builtin_shufflevector((__v8di)(__m512i)(A), \ - (__v8di)_mm512_undefined_epi32(), \ - ((imm) & 1) ? 4 : 0, \ - ((imm) & 1) ? 5 : 1, \ - ((imm) & 1) ? 6 : 2, \ - ((imm) & 1) ? 7 : 3) + (__m256i)__builtin_ia32_extracti64x4((__v8di)(__m512i)(A), (int)(imm)) #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ @@ -7580,16 +7560,8 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) (__v4di)_mm256_setzero_si256()) #define _mm512_insertf64x4(A, B, imm) \ - (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \ - (__v8df)_mm512_castpd256_pd512((__m256d)(B)), \ - ((imm) & 0x1) ? 0 : 8, \ - ((imm) & 0x1) ? 1 : 9, \ - ((imm) & 0x1) ? 2 : 10, \ - ((imm) & 0x1) ? 3 : 11, \ - ((imm) & 0x1) ? 8 : 4, \ - ((imm) & 0x1) ? 9 : 5, \ - ((imm) & 0x1) ? 10 : 6, \ - ((imm) & 0x1) ? 11 : 7) + (__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \ + (__v4df)(__m256d)(B), (int)(imm)) #define _mm512_mask_insertf64x4(W, U, A, B, imm) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ @@ -7602,16 +7574,8 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) (__v8df)_mm512_setzero_pd()) #define _mm512_inserti64x4(A, B, imm) \ - (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \ - (__v8di)_mm512_castsi256_si512((__m256i)(B)), \ - ((imm) & 0x1) ? 0 : 8, \ - ((imm) & 0x1) ? 1 : 9, \ - ((imm) & 0x1) ? 2 : 10, \ - ((imm) & 0x1) ? 3 : 11, \ - ((imm) & 0x1) ? 8 : 4, \ - ((imm) & 0x1) ? 9 : 5, \ - ((imm) & 0x1) ? 10 : 6, \ - ((imm) & 0x1) ? 11 : 7) + (__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \ + (__v4di)(__m256i)(B), (int)(imm)) #define _mm512_mask_inserti64x4(W, U, A, B, imm) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ @@ -7624,24 +7588,8 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) (__v8di)_mm512_setzero_si512()) #define _mm512_insertf32x4(A, B, imm) \ - (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_castps128_ps512((__m128)(B)),\ - (((imm) & 0x3) == 0) ? 16 : 0, \ - (((imm) & 0x3) == 0) ? 17 : 1, \ - (((imm) & 0x3) == 0) ? 18 : 2, \ - (((imm) & 0x3) == 0) ? 19 : 3, \ - (((imm) & 0x3) == 1) ? 16 : 4, \ - (((imm) & 0x3) == 1) ? 17 : 5, \ - (((imm) & 0x3) == 1) ? 18 : 6, \ - (((imm) & 0x3) == 1) ? 19 : 7, \ - (((imm) & 0x3) == 2) ? 16 : 8, \ - (((imm) & 0x3) == 2) ? 17 : 9, \ - (((imm) & 0x3) == 2) ? 18 : 10, \ - (((imm) & 0x3) == 2) ? 19 : 11, \ - (((imm) & 0x3) == 3) ? 16 : 12, \ - (((imm) & 0x3) == 3) ? 17 : 13, \ - (((imm) & 0x3) == 3) ? 18 : 14, \ - (((imm) & 0x3) == 3) ? 19 : 15) + (__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \ + (__v4sf)(__m128)(B), (int)(imm)) #define _mm512_mask_insertf32x4(W, U, A, B, imm) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ @@ -7654,24 +7602,8 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) (__v16sf)_mm512_setzero_ps()) #define _mm512_inserti32x4(A, B, imm) \ - (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \ - (__v16si)_mm512_castsi128_si512((__m128i)(B)),\ - (((imm) & 0x3) == 0) ? 16 : 0, \ - (((imm) & 0x3) == 0) ? 17 : 1, \ - (((imm) & 0x3) == 0) ? 18 : 2, \ - (((imm) & 0x3) == 0) ? 19 : 3, \ - (((imm) & 0x3) == 1) ? 16 : 4, \ - (((imm) & 0x3) == 1) ? 17 : 5, \ - (((imm) & 0x3) == 1) ? 18 : 6, \ - (((imm) & 0x3) == 1) ? 19 : 7, \ - (((imm) & 0x3) == 2) ? 16 : 8, \ - (((imm) & 0x3) == 2) ? 17 : 9, \ - (((imm) & 0x3) == 2) ? 18 : 10, \ - (((imm) & 0x3) == 2) ? 19 : 11, \ - (((imm) & 0x3) == 3) ? 16 : 12, \ - (((imm) & 0x3) == 3) ? 17 : 13, \ - (((imm) & 0x3) == 3) ? 18 : 14, \ - (((imm) & 0x3) == 3) ? 19 : 15) + (__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \ + (__v4si)(__m128i)(B), (int)(imm)) #define _mm512_mask_inserti32x4(W, U, A, B, imm) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ diff --git a/clang/lib/Headers/avx512vldqintrin.h b/clang/lib/Headers/avx512vldqintrin.h index 60571adb5e9..8d6ff3ec8d9 100644 --- a/clang/lib/Headers/avx512vldqintrin.h +++ b/clang/lib/Headers/avx512vldqintrin.h @@ -1083,10 +1083,7 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) } #define _mm256_extractf64x2_pd(A, imm) \ - (__m128d)__builtin_shufflevector((__v4df)(__m256d)(A), \ - (__v4df)_mm256_undefined_pd(), \ - ((imm) & 1) ? 2 : 0, \ - ((imm) & 1) ? 3 : 1) + (__m128d)__builtin_ia32_extractf64x2_256((__v4df)(__m256d)(A), (int)(imm)) #define _mm256_mask_extractf64x2_pd(W, U, A, imm) \ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ @@ -1099,10 +1096,7 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) (__v2df)_mm_setzero_pd()) #define _mm256_extracti64x2_epi64(A, imm) \ - (__m128i)__builtin_shufflevector((__v4di)(__m256i)(A), \ - (__v4di)_mm256_undefined_si256(), \ - ((imm) & 1) ? 2 : 0, \ - ((imm) & 1) ? 3 : 1) + (__m128i)__builtin_ia32_extracti64x2_256((__v4di)(__m256i)(A), (int)(imm)) #define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \ (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ @@ -1115,12 +1109,8 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) (__v2di)_mm_setzero_si128()) #define _mm256_insertf64x2(A, B, imm) \ - (__m256d)__builtin_shufflevector((__v4df)(__m256d)(A), \ - (__v4df)_mm256_castpd128_pd256((__m128d)(B)), \ - ((imm) & 0x1) ? 0 : 4, \ - ((imm) & 0x1) ? 1 : 5, \ - ((imm) & 0x1) ? 4 : 2, \ - ((imm) & 0x1) ? 5 : 3) + (__m256d)__builtin_ia32_insertf64x2_256((__v4df)(__m256d)(A), \ + (__v2df)(__m128d)(B), (int)(imm)) #define _mm256_mask_insertf64x2(W, U, A, B, imm) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ @@ -1133,12 +1123,8 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) (__v4df)_mm256_setzero_pd()) #define _mm256_inserti64x2(A, B, imm) \ - (__m256i)__builtin_shufflevector((__v4di)(__m256i)(A), \ - (__v4di)_mm256_castsi128_si256((__m128i)(B)), \ - ((imm) & 0x1) ? 0 : 4, \ - ((imm) & 0x1) ? 1 : 5, \ - ((imm) & 0x1) ? 4 : 2, \ - ((imm) & 0x1) ? 5 : 3) + (__m256i)__builtin_ia32_inserti64x2_256((__v4di)(__m256i)(A), \ + (__v2di)(__m128i)(B), (int)(imm)) #define _mm256_mask_inserti64x2(W, U, A, B, imm) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 49198ec5336..d1e81528cde 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -7699,12 +7699,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) } #define _mm256_extractf32x4_ps(A, imm) \ - (__m128)__builtin_shufflevector((__v8sf)(__m256)(A), \ - (__v8sf)_mm256_undefined_ps(), \ - ((imm) & 1) ? 4 : 0, \ - ((imm) & 1) ? 5 : 1, \ - ((imm) & 1) ? 6 : 2, \ - ((imm) & 1) ? 7 : 3) + (__m128)__builtin_ia32_extractf32x4_256((__v8sf)(__m256)(A), (int)(imm)) #define _mm256_mask_extractf32x4_ps(W, U, A, imm) \ (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ @@ -7717,12 +7712,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) (__v4sf)_mm_setzero_ps()) #define _mm256_extracti32x4_epi32(A, imm) \ - (__m128i)__builtin_shufflevector((__v8si)(__m256)(A), \ - (__v8si)_mm256_undefined_si256(), \ - ((imm) & 1) ? 4 : 0, \ - ((imm) & 1) ? 5 : 1, \ - ((imm) & 1) ? 6 : 2, \ - ((imm) & 1) ? 7 : 3) + (__m128i)__builtin_ia32_extracti32x4_256((__v8si)(__m256i)(A), (int)(imm)) #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \ (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ @@ -7735,16 +7725,8 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) (__v4si)_mm_setzero_si128()) #define _mm256_insertf32x4(A, B, imm) \ - (__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \ - (__v8sf)_mm256_castps128_ps256((__m128)(B)), \ - ((imm) & 0x1) ? 0 : 8, \ - ((imm) & 0x1) ? 1 : 9, \ - ((imm) & 0x1) ? 2 : 10, \ - ((imm) & 0x1) ? 3 : 11, \ - ((imm) & 0x1) ? 8 : 4, \ - ((imm) & 0x1) ? 9 : 5, \ - ((imm) & 0x1) ? 10 : 6, \ - ((imm) & 0x1) ? 11 : 7) + (__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \ + (__v4sf)(__m128)(B), (int)(imm)) #define _mm256_mask_insertf32x4(W, U, A, B, imm) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ @@ -7757,16 +7739,8 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) (__v8sf)_mm256_setzero_ps()) #define _mm256_inserti32x4(A, B, imm) \ - (__m256i)__builtin_shufflevector((__v8si)(__m256i)(A), \ - (__v8si)_mm256_castsi128_si256((__m128i)(B)), \ - ((imm) & 0x1) ? 0 : 8, \ - ((imm) & 0x1) ? 1 : 9, \ - ((imm) & 0x1) ? 2 : 10, \ - ((imm) & 0x1) ? 3 : 11, \ - ((imm) & 0x1) ? 8 : 4, \ - ((imm) & 0x1) ? 9 : 5, \ - ((imm) & 0x1) ? 10 : 6, \ - ((imm) & 0x1) ? 11 : 7) + (__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \ + (__v4si)(__m128i)(B), (int)(imm)) #define _mm256_mask_inserti32x4(W, U, A, B, imm) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 7c85893ba13..6c42132cf44 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -4613,17 +4613,8 @@ _mm256_zextsi128_si256(__m128i __a) /// result. /// \returns A 256-bit vector of [8 x float] containing the interleaved values. #define _mm256_insertf128_ps(V1, V2, M) \ - (__m256)__builtin_shufflevector( \ - (__v8sf)(__m256)(V1), \ - (__v8sf)_mm256_castps128_ps256((__m128)(V2)), \ - (((M) & 1) ? 0 : 8), \ - (((M) & 1) ? 1 : 9), \ - (((M) & 1) ? 2 : 10), \ - (((M) & 1) ? 3 : 11), \ - (((M) & 1) ? 8 : 4), \ - (((M) & 1) ? 9 : 5), \ - (((M) & 1) ? 10 : 6), \ - (((M) & 1) ? 11 : 7) ) + (__m256)__builtin_ia32_vinsertf128_ps256((__v8sf)(__m256)(V1), \ + (__v4sf)(__m128)(V2), (int)(M)) /// Constructs a new 256-bit vector of [4 x double] by first duplicating /// a 256-bit vector of [4 x double] given in the first parameter, and then @@ -4660,13 +4651,8 @@ _mm256_zextsi128_si256(__m128i __a) /// result. /// \returns A 256-bit vector of [4 x double] containing the interleaved values. #define _mm256_insertf128_pd(V1, V2, M) \ - (__m256d)__builtin_shufflevector( \ - (__v4df)(__m256d)(V1), \ - (__v4df)_mm256_castpd128_pd256((__m128d)(V2)), \ - (((M) & 1) ? 0 : 4), \ - (((M) & 1) ? 1 : 5), \ - (((M) & 1) ? 4 : 2), \ - (((M) & 1) ? 5 : 3) ) + (__m256d)__builtin_ia32_vinsertf128_pd256((__v4df)(__m256d)(V1), \ + (__v2df)(__m128d)(V2), (int)(M)) /// Constructs a new 256-bit integer vector by first duplicating a /// 256-bit integer vector given in the first parameter, and then replacing @@ -4703,13 +4689,8 @@ _mm256_zextsi128_si256(__m128i __a) /// result. /// \returns A 256-bit integer vector containing the interleaved values. #define _mm256_insertf128_si256(V1, V2, M) \ - (__m256i)__builtin_shufflevector( \ - (__v4di)(__m256i)(V1), \ - (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \ - (((M) & 1) ? 0 : 4), \ - (((M) & 1) ? 1 : 5), \ - (((M) & 1) ? 4 : 2), \ - (((M) & 1) ? 5 : 3) ) + (__m256i)__builtin_ia32_vinsertf128_si256((__v4di)(__m256i)(V1), \ + (__v2di)(__m128i)(V2), (int)(M)) /* Vector extract. @@ -4738,13 +4719,7 @@ _mm256_zextsi128_si256(__m128i __a) /// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result. /// \returns A 128-bit vector of [4 x float] containing the extracted bits. #define _mm256_extractf128_ps(V, M) \ - (__m128)__builtin_shufflevector( \ - (__v8sf)(__m256)(V), \ - (__v8sf)(_mm256_undefined_ps()), \ - (((M) & 1) ? 4 : 0), \ - (((M) & 1) ? 5 : 1), \ - (((M) & 1) ? 6 : 2), \ - (((M) & 1) ? 7 : 3) ) + (__m128)__builtin_ia32_vextractf128_ps256((__v8sf)(__m256)(V), (int)(M)) /// Extracts either the upper or the lower 128 bits from a 256-bit vector /// of [4 x double], as determined by the immediate integer parameter, and @@ -4768,11 +4743,7 @@ _mm256_zextsi128_si256(__m128i __a) /// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result. /// \returns A 128-bit vector of [2 x double] containing the extracted bits. #define _mm256_extractf128_pd(V, M) \ - (__m128d)__builtin_shufflevector( \ - (__v4df)(__m256d)(V), \ - (__v4df)(_mm256_undefined_pd()), \ - (((M) & 1) ? 2 : 0), \ - (((M) & 1) ? 3 : 1) ) + (__m128d)__builtin_ia32_vextractf128_pd256((__v4df)(__m256d)(V), (int)(M)) /// Extracts either the upper or the lower 128 bits from a 256-bit /// integer vector, as determined by the immediate integer parameter, and @@ -4796,11 +4767,7 @@ _mm256_zextsi128_si256(__m128i __a) /// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result. /// \returns A 128-bit integer vector containing the extracted bits. #define _mm256_extractf128_si256(V, M) \ - (__m128i)__builtin_shufflevector( \ - (__v4di)(__m256i)(V), \ - (__v4di)(_mm256_undefined_si256()), \ - (((M) & 1) ? 2 : 0), \ - (((M) & 1) ? 3 : 1) ) + (__m128i)__builtin_ia32_vextractf128_si256((__v4di)(__m256i)(V), (int)(M)) /* SIMD load ops (unaligned) */ /// Loads two 128-bit floating-point vectors of [4 x float] from diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index d16921c6c70..4f5405935e2 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2607,9 +2607,33 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return false; case X86::BI__builtin_ia32_vec_ext_v2si: case X86::BI__builtin_ia32_vec_ext_v2di: + case X86::BI__builtin_ia32_vextractf128_pd256: + case X86::BI__builtin_ia32_vextractf128_ps256: + case X86::BI__builtin_ia32_vextractf128_si256: + case X86::BI__builtin_ia32_extract128i256: + case X86::BI__builtin_ia32_extractf64x4: + case X86::BI__builtin_ia32_extracti64x4: + case X86::BI__builtin_ia32_extractf32x8: + case X86::BI__builtin_ia32_extracti32x8: + case X86::BI__builtin_ia32_extractf64x2_256: + case X86::BI__builtin_ia32_extracti64x2_256: + case X86::BI__builtin_ia32_extractf32x4_256: + case X86::BI__builtin_ia32_extracti32x4_256: i = 1; l = 0; u = 1; break; case X86::BI__builtin_ia32_vec_set_v2di: + case X86::BI__builtin_ia32_vinsertf128_pd256: + case X86::BI__builtin_ia32_vinsertf128_ps256: + case X86::BI__builtin_ia32_vinsertf128_si256: + case X86::BI__builtin_ia32_insert128i256: + case X86::BI__builtin_ia32_insertf32x8: + case X86::BI__builtin_ia32_inserti32x8: + case X86::BI__builtin_ia32_insertf64x4: + case X86::BI__builtin_ia32_inserti64x4: + case X86::BI__builtin_ia32_insertf64x2_256: + case X86::BI__builtin_ia32_inserti64x2_256: + case X86::BI__builtin_ia32_insertf32x4_256: + case X86::BI__builtin_ia32_inserti32x4_256: i = 2; l = 0; u = 1; break; case X86::BI__builtin_ia32_vpermilpd: @@ -2617,6 +2641,10 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_vec_ext_v4si: case X86::BI__builtin_ia32_vec_ext_v4sf: case X86::BI__builtin_ia32_vec_ext_v4di: + case X86::BI__builtin_ia32_extractf32x4: + case X86::BI__builtin_ia32_extracti32x4: + case X86::BI__builtin_ia32_extractf64x2_512: + case X86::BI__builtin_ia32_extracti64x2_512: i = 1; l = 0; u = 3; break; case X86::BI_mm_prefetch: @@ -2633,6 +2661,10 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_shuf_f64x2_256: case X86::BI__builtin_ia32_shuf_i32x4_256: case X86::BI__builtin_ia32_shuf_i64x2_256: + case X86::BI__builtin_ia32_insertf64x2_512: + case X86::BI__builtin_ia32_inserti64x2_512: + case X86::BI__builtin_ia32_insertf32x4: + case X86::BI__builtin_ia32_inserti32x4: i = 2; l = 0; u = 3; break; case X86::BI__builtin_ia32_vpermil2pd: diff --git a/clang/test/CodeGen/avx-builtins.c b/clang/test/CodeGen/avx-builtins.c index cb4b103f46f..5743e36c1de 100644 --- a/clang/test/CodeGen/avx-builtins.c +++ b/clang/test/CodeGen/avx-builtins.c @@ -342,19 +342,19 @@ long long test_mm256_extract_epi64(__m256i A) { __m128d test_mm256_extractf128_pd(__m256d A) { // CHECK-LABEL: test_mm256_extractf128_pd - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> zeroinitializer, <2 x i32> <i32 2, i32 3> + // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3> return _mm256_extractf128_pd(A, 1); } __m128 test_mm256_extractf128_ps(__m256 A) { // CHECK-LABEL: test_mm256_extractf128_ps - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> return _mm256_extractf128_ps(A, 1); } __m128i test_mm256_extractf128_si256(__m256i A) { // CHECK-LABEL: test_mm256_extractf128_si256 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> zeroinitializer, <2 x i32> <i32 2, i32 3> + // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> return _mm256_extractf128_si256(A, 1); } @@ -420,22 +420,22 @@ __m256i test_mm256_insert_epi64(__m256i x, long long b) { __m256d test_mm256_insertf128_pd(__m256d A, __m128d B) { // CHECK-LABEL: test_mm256_insertf128_pd - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 2, i32 3> return _mm256_insertf128_pd(A, B, 0); } __m256 test_mm256_insertf128_ps(__m256 A, __m128 B) { // CHECK-LABEL: test_mm256_insertf128_ps - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> return _mm256_insertf128_ps(A, B, 1); } __m256i test_mm256_insertf128_si256(__m256i A, __m128i B) { // CHECK-LABEL: test_mm256_insertf128_si256 - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 2, i32 3> + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7> return _mm256_insertf128_si256(A, B, 0); } @@ -486,7 +486,7 @@ __m256 test_mm256_loadu2_m128(float* A, float* B) { // CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 1{{$}} // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> // CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 1{{$}} - // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> return _mm256_loadu2_m128(A, B); } @@ -496,7 +496,7 @@ __m256d test_mm256_loadu2_m128d(double* A, double* B) { // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 1{{$}} // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 1{{$}} - // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5> return _mm256_loadu2_m128d(A, B); } @@ -506,8 +506,8 @@ __m256i test_mm256_loadu2_m128i(__m128i* A, __m128i* B) { // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 1{{$}} // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 1{{$}} - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5> + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> return _mm256_loadu2_m128i(A, B); } @@ -1169,7 +1169,7 @@ void test_mm256_storeu2_m128(float* A, float* B, __m256 C) { // CHECK-LABEL: test_mm256_storeu2_m128 // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 1{{$}} - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 1{{$}} _mm256_storeu2_m128(A, B, C); } @@ -1178,7 +1178,7 @@ void test_mm256_storeu2_m128d(double* A, double* B, __m256d C) { // CHECK-LABEL: test_mm256_storeu2_m128d // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <2 x i32> <i32 0, i32 1> // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 1{{$}} - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> zeroinitializer, <2 x i32> <i32 2, i32 3> + // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3> // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 1{{$}} _mm256_storeu2_m128d(A, B, C); } @@ -1187,7 +1187,7 @@ void test_mm256_storeu2_m128i(__m128i* A, __m128i* B, __m256i C) { // CHECK-LABEL: test_mm256_storeu2_m128i // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <2 x i32> <i32 0, i32 1> // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> zeroinitializer, <2 x i32> <i32 2, i32 3> + // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}} _mm256_storeu2_m128i(A, B, C); } diff --git a/clang/test/CodeGen/avx-shuffle-builtins.c b/clang/test/CodeGen/avx-shuffle-builtins.c index b5bac89bd1e..08300159613 100644 --- a/clang/test/CodeGen/avx-shuffle-builtins.c +++ b/clang/test/CodeGen/avx-shuffle-builtins.c @@ -103,7 +103,7 @@ __m256d test_mm256_insertf128_pd_0(__m256d a, __m128d b) { __m256i test_mm256_insertf128_si256_0(__m256i a, __m128i b) { // CHECK-LABEL: @test_mm256_insertf128_si256_0 - // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 2, i32 3> + // CHECK: shufflevector{{.*}}<i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7> return _mm256_insertf128_si256(a, b, 0); } @@ -121,7 +121,7 @@ __m256d test_mm256_insertf128_pd_1(__m256d a, __m128d b) { __m256i test_mm256_insertf128_si256_1(__m256i a, __m128i b) { // CHECK-LABEL: @test_mm256_insertf128_si256_1 - // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 4, i32 5> + // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> return _mm256_insertf128_si256(a, b, 1); } @@ -141,7 +141,7 @@ __m128d test_mm256_extractf128_pd_0(__m256d a) { __m128i test_mm256_extractf128_si256_0(__m256i a) { // CHECK-LABEL: @test_mm256_extractf128_si256_0 - // CHECK: shufflevector{{.*}}<i32 0, i32 1> + // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3> return _mm256_extractf128_si256(a, 0); } @@ -159,7 +159,7 @@ __m128d test_mm256_extractf128_pd_1(__m256d a) { __m128i test_mm256_extractf128_si256_1(__m256i a) { // CHECK-LABEL: @test_mm256_extractf128_si256_1 - // CHECK: shufflevector{{.*}}<i32 2, i32 3> + // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 6, i32 7> return _mm256_extractf128_si256(a, 1); } diff --git a/clang/test/CodeGen/avx2-builtins.c b/clang/test/CodeGen/avx2-builtins.c index bd7e3a95968..72b4349f6e8 100644 --- a/clang/test/CodeGen/avx2-builtins.c +++ b/clang/test/CodeGen/avx2-builtins.c @@ -386,21 +386,21 @@ __m256i test_mm256_cvtepu32_epi64(__m128i a) { __m128i test0_mm256_extracti128_si256_0(__m256i a) { // CHECK-LABEL: test0_mm256_extracti128_si256 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> zeroinitializer, <2 x i32> <i32 0, i32 1> + // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1> return _mm256_extracti128_si256(a, 0); } __m128i test1_mm256_extracti128_si256_1(__m256i a) { // CHECK-LABEL: test1_mm256_extracti128_si256 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> zeroinitializer, <2 x i32> <i32 2, i32 3> + // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3> return _mm256_extracti128_si256(a, 1); } // Immediate should be truncated to one bit. __m128i test2_mm256_extracti128_si256(__m256i a) { // CHECK-LABEL: test2_mm256_extracti128_si256 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> zeroinitializer, <2 x i32> <i32 0, i32 1> - return _mm256_extracti128_si256(a, 2); + // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1> + return _mm256_extracti128_si256(a, 0); } __m256i test_mm256_hadd_epi16(__m256i a, __m256i b) { @@ -657,14 +657,14 @@ __m128 test_mm256_mask_i64gather_ps(__m128 a, float const *b, __m256i c, __m128 __m256i test0_mm256_inserti128_si256(__m256i a, __m128i b) { // CHECK-LABEL: test0_mm256_inserti128_si256 - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 2, i32 3> return _mm256_inserti128_si256(a, b, 0); } __m256i test1_mm256_inserti128_si256(__m256i a, __m128i b) { // CHECK-LABEL: test1_mm256_inserti128_si256 - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5> return _mm256_inserti128_si256(a, b, 1); } @@ -672,9 +672,9 @@ __m256i test1_mm256_inserti128_si256(__m256i a, __m128i b) { // Immediate should be truncated to one bit. __m256i test2_mm256_inserti128_si256(__m256i a, __m128i b) { // CHECK-LABEL: test2_mm256_inserti128_si256 - // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 2, i32 3> - return _mm256_inserti128_si256(a, b, 2); + return _mm256_inserti128_si256(a, b, 0); } __m256i test_mm256_madd_epi16(__m256i a, __m256i b) { diff --git a/clang/test/CodeGen/avx512dq-builtins.c b/clang/test/CodeGen/avx512dq-builtins.c index 756ecf3e6ce..f5c4b2633bd 100644 --- a/clang/test/CodeGen/avx512dq-builtins.c +++ b/clang/test/CodeGen/avx512dq-builtins.c @@ -1075,80 +1075,80 @@ __m512i test_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i const* __A) { __m256 test_mm512_extractf32x8_ps(__m512 __A) { // CHECK-LABEL: @test_mm512_extractf32x8_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> return _mm512_extractf32x8_ps(__A, 1); } __m256 test_mm512_mask_extractf32x8_ps(__m256 __W, __mmask8 __U, __m512 __A) { // CHECK-LABEL: @test_mm512_mask_extractf32x8_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm512_mask_extractf32x8_ps(__W, __U, __A, 1); } __m256 test_mm512_maskz_extractf32x8_ps(__mmask8 __U, __m512 __A) { // CHECK-LABEL: @test_mm512_maskz_extractf32x8_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm512_maskz_extractf32x8_ps(__U, __A, 1); } __m128d test_mm512_extractf64x2_pd(__m512d __A) { // CHECK-LABEL: @test_mm512_extractf64x2_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> zeroinitializer, <2 x i32> <i32 6, i32 7> + // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <2 x i32> <i32 6, i32 7> return _mm512_extractf64x2_pd(__A, 3); } __m128d test_mm512_mask_extractf64x2_pd(__m128d __W, __mmask8 __U, __m512d __A) { // CHECK-LABEL: @test_mm512_mask_extractf64x2_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> zeroinitializer, <2 x i32> <i32 6, i32 7> + // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <2 x i32> <i32 6, i32 7> // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm512_mask_extractf64x2_pd(__W, __U, __A, 3); } __m128d test_mm512_maskz_extractf64x2_pd(__mmask8 __U, __m512d __A) { // CHECK-LABEL: @test_mm512_maskz_extractf64x2_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> zeroinitializer, <2 x i32> <i32 6, i32 7> + // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <2 x i32> <i32 6, i32 7> // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm512_maskz_extractf64x2_pd(__U, __A, 3); } __m256i test_mm512_extracti32x8_epi32(__m512i __A) { // CHECK-LABEL: @test_mm512_extracti32x8_epi32 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> return _mm512_extracti32x8_epi32(__A, 1); } __m256i test_mm512_mask_extracti32x8_epi32(__m256i __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_extracti32x8_epi32 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm512_mask_extracti32x8_epi32(__W, __U, __A, 1); } __m256i test_mm512_maskz_extracti32x8_epi32(__mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_extracti32x8_epi32 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm512_maskz_extracti32x8_epi32(__U, __A, 1); } __m128i test_mm512_extracti64x2_epi64(__m512i __A) { // CHECK-LABEL: @test_mm512_extracti64x2_epi64 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> zeroinitializer, <2 x i32> <i32 6, i32 7> + // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <2 x i32> <i32 6, i32 7> return _mm512_extracti64x2_epi64(__A, 3); } __m128i test_mm512_mask_extracti64x2_epi64(__m128i __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_extracti64x2_epi64 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> zeroinitializer, <2 x i32> <i32 6, i32 7> + // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <2 x i32> <i32 6, i32 7> // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm512_mask_extracti64x2_epi64(__W, __U, __A, 3); } __m128i test_mm512_maskz_extracti64x2_epi64(__mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_extracti64x2_epi64 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> zeroinitializer, <2 x i32> <i32 6, i32 7> + // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <2 x i32> <i32 6, i32 7> // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm512_maskz_extracti64x2_epi64(__U, __A, 3); } diff --git a/clang/test/CodeGen/avx512f-builtins.c b/clang/test/CodeGen/avx512f-builtins.c index 0ed5b7b9527..52554ba183e 100644 --- a/clang/test/CodeGen/avx512f-builtins.c +++ b/clang/test/CodeGen/avx512f-builtins.c @@ -1508,20 +1508,20 @@ __mmask8 test_mm512_mask_cmpunord_ps_mask(__mmask8 k, __m512 a, __m512 b) { __m256d test_mm512_extractf64x4_pd(__m512d a) { // CHECK-LABEL: @test_mm512_extractf64x4_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> return _mm512_extractf64x4_pd(a, 1); } __m256d test_mm512_mask_extractf64x4_pd(__m256d __W,__mmask8 __U,__m512d __A){ // CHECK-LABEL:@test_mm512_mask_extractf64x4_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm512_mask_extractf64x4_pd( __W, __U, __A, 1); } __m256d test_mm512_maskz_extractf64x4_pd(__mmask8 __U,__m512d __A){ // CHECK-LABEL:@test_mm512_maskz_extractf64x4_pd - // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm512_maskz_extractf64x4_pd( __U, __A, 1); } @@ -1529,20 +1529,20 @@ __m256d test_mm512_maskz_extractf64x4_pd(__mmask8 __U,__m512d __A){ __m128 test_mm512_extractf32x4_ps(__m512 a) { // CHECK-LABEL: @test_mm512_extractf32x4_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> return _mm512_extractf32x4_ps(a, 1); } __m128 test_mm512_mask_extractf32x4_ps(__m128 __W, __mmask8 __U,__m512d __A){ // CHECK-LABEL:@test_mm512_mask_extractf32x4_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm512_mask_extractf32x4_ps( __W, __U, __A, 1); } __m128 test_mm512_maskz_extractf32x4_ps( __mmask8 __U,__m512d __A){ // CHECK-LABEL:@test_mm512_maskz_extractf32x4_ps - // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm512_maskz_extractf32x4_ps( __U, __A, 1); } @@ -5514,40 +5514,40 @@ void test_mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A) __m128i test_mm512_extracti32x4_epi32(__m512i __A) { // CHECK-LABEL: @test_mm512_extracti32x4_epi32 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <4 x i32> <i32 12, i32 13, i32 14, i32 15> + // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15> return _mm512_extracti32x4_epi32(__A, 3); } __m128i test_mm512_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_extracti32x4_epi32 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <4 x i32> <i32 12, i32 13, i32 14, i32 15> + // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15> // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm512_mask_extracti32x4_epi32(__W, __U, __A, 3); } __m128i test_mm512_maskz_extracti32x4_epi32(__mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_extracti32x4_epi32 - // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <4 x i32> <i32 12, i32 13, i32 14, i32 15> + // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15> // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm512_maskz_extracti32x4_epi32(__U, __A, 3); } __m256i test_mm512_extracti64x4_epi64(__m512i __A) { // CHECK-LABEL: @test_mm512_extracti64x4_epi64 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> return _mm512_extracti64x4_epi64(__A, 1); } __m256i test_mm512_mask_extracti64x4_epi64(__m256i __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_extracti64x4_epi64 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm512_mask_extracti64x4_epi64(__W, __U, __A, 1); } __m256i test_mm512_maskz_extracti64x4_epi64(__mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_extracti64x4_epi64 - // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm512_maskz_extracti64x4_epi64(__U, __A, 1); } diff --git a/clang/test/CodeGen/avx512vl-builtins.c b/clang/test/CodeGen/avx512vl-builtins.c index 27dca971d8f..1edc25d8494 100644 --- a/clang/test/CodeGen/avx512vl-builtins.c +++ b/clang/test/CodeGen/avx512vl-builtins.c @@ -7101,40 +7101,40 @@ void test_mm256_mask_cvtepi64_storeu_epi16(void * __P, __mmask8 __M, __m256i __A __m128 test_mm256_extractf32x4_ps(__m256 __A) { // CHECK-LABEL: @test_mm256_extractf32x4_ps - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> return _mm256_extractf32x4_ps(__A, 1); } __m128 test_mm256_mask_extractf32x4_ps(__m128 __W, __mmask8 __U, __m256 __A) { // CHECK-LABEL: @test_mm256_mask_extractf32x4_ps - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm256_mask_extractf32x4_ps(__W, __U, __A, 1); } __m128 test_mm256_maskz_extractf32x4_ps(__mmask8 __U, __m256 __A) { // CHECK-LABEL: @test_mm256_maskz_extractf32x4_ps - // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm256_maskz_extractf32x4_ps(__U, __A, 1); } __m128i test_mm256_extracti32x4_epi32(__m256i __A) { // CHECK-LABEL: @test_mm256_extracti32x4_epi32 - // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> return _mm256_extracti32x4_epi32(__A, 1); } __m128i test_mm256_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_mask_extracti32x4_epi32 - // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm256_mask_extracti32x4_epi32(__W, __U, __A, 1); } __m128i test_mm256_maskz_extracti32x4_epi32(__mmask8 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_maskz_extracti32x4_epi32 - // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm256_maskz_extracti32x4_epi32(__U, __A, 1); } diff --git a/clang/test/CodeGen/avx512vldq-builtins.c b/clang/test/CodeGen/avx512vldq-builtins.c index ea9db6ecc8a..31ae623fee3 100644 --- a/clang/test/CodeGen/avx512vldq-builtins.c +++ b/clang/test/CodeGen/avx512vldq-builtins.c @@ -1024,40 +1024,40 @@ __m256i test_mm256_maskz_broadcast_i64x2(__mmask8 __M, __m128i const* __A) { __m128d test_mm256_extractf64x2_pd(__m256d __A) { // CHECK-LABEL: @test_mm256_extractf64x2_pd - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> zeroinitializer, <2 x i32> <i32 2, i32 3> + // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3> return _mm256_extractf64x2_pd(__A, 1); } __m128d test_mm256_mask_extractf64x2_pd(__m128d __W, __mmask8 __U, __m256d __A) { // CHECK-LABEL: @test_mm256_mask_extractf64x2_pd - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> zeroinitializer, <2 x i32> <i32 2, i32 3> + // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3> // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm256_mask_extractf64x2_pd(__W, __U, __A, 1); } __m128d test_mm256_maskz_extractf64x2_pd(__mmask8 __U, __m256d __A) { // CHECK-LABEL: @test_mm256_maskz_extractf64x2_pd - // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> zeroinitializer, <2 x i32> <i32 2, i32 3> + // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3> // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm256_maskz_extractf64x2_pd(__U, __A, 1); } __m128i test_mm256_extracti64x2_epi64(__m256i __A) { // CHECK-LABEL: @test_mm256_extracti64x2_epi64 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> zeroinitializer, <2 x i32> <i32 2, i32 3> + // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3> return _mm256_extracti64x2_epi64(__A, 1); } __m128i test_mm256_mask_extracti64x2_epi64(__m128i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_mask_extracti64x2_epi64 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> zeroinitializer, <2 x i32> <i32 2, i32 3> + // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3> // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm256_mask_extracti64x2_epi64(__W, __U, __A, 1); } __m128i test_mm256_maskz_extracti64x2_epi64(__mmask8 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_maskz_extracti64x2_epi64 - // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> zeroinitializer, <2 x i32> <i32 2, i32 3> + // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3> // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm256_maskz_extracti64x2_epi64(__U, __A, 1); } |