diff options
| -rw-r--r-- | clang/include/clang/Basic/BuiltinsX86.def | 3 | ||||
| -rw-r--r-- | clang/lib/Headers/avx512fintrin.h | 33 | ||||
| -rw-r--r-- | clang/lib/Headers/avx512vlintrin.h | 25 | ||||
| -rw-r--r-- | clang/lib/Sema/SemaChecking.cpp | 3 | ||||
| -rw-r--r-- | clang/test/CodeGen/avx512f-builtins.c | 10 | ||||
| -rw-r--r-- | clang/test/CodeGen/avx512vl-builtins.c | 12 |
6 files changed, 50 insertions, 36 deletions
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index d2da5512a18..e8537ee22bd 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -2177,9 +2177,6 @@ TARGET_BUILTIN(__builtin_ia32_movshdup128_mask, "V4fV4fV4fUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_movshdup256_mask, "V8fV8fV8fUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_movsldup128_mask, "V4fV4fV4fUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_movsldup256_mask, "V8fV8fV8fUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_pshufd512_mask, "V16iV16iIiV16iUs","","avx512f") -TARGET_BUILTIN(__builtin_ia32_pshufd256_mask, "V8iV8iIiV8iUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_pshufd128_mask, "V4iV4iIiV4iUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_expanddf512_mask, "V8dV8dV8dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_expanddi512_mask, "V8LLiV8LLiV8LLiUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_expandloaddf512_mask, "V8dV8dC*V8dUc","","avx512f") diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index b1f0ebf50f1..90b87b2586c 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -9052,19 +9052,34 @@ _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A) } #define _mm512_shuffle_epi32(A, I) __extension__ ({ \ - (__m512i)__builtin_ia32_pshufd512_mask((__v16si)(__m512i)(A), (int)(I), \ - (__v16si)_mm512_undefined_epi32(), \ - (__mmask16)-1); }) + (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \ + (__v16si)_mm512_setzero_si512(), \ + 0 + (((I) & 0x03) >> 0), \ + 0 + (((I) & 0x0c) >> 2), \ + 0 + (((I) & 0x30) >> 4), \ + 0 + (((I) & 0xc0) >> 6), \ + 4 + (((I) & 0x03) >> 0), \ + 4 + (((I) & 0x0c) >> 2), \ + 4 + (((I) & 0x30) >> 4), \ + 4 + (((I) & 0xc0) >> 6), \ + 8 + (((I) & 0x03) >> 0), \ + 8 + (((I) & 0x0c) >> 2), \ + 8 + (((I) & 0x30) >> 4), \ + 8 + (((I) & 0xc0) >> 6), \ + 12 + (((I) & 0x03) >> 0), \ + 12 + (((I) & 0x0c) >> 2), \ + 12 + (((I) & 0x30) >> 4), \ + 12 + (((I) & 0xc0) >> 6)); }) #define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \ - (__m512i)__builtin_ia32_pshufd512_mask((__v16si)(__m512i)(A), (int)(I), \ - (__v16si)(__m512i)(W), \ - (__mmask16)(U)); }) + (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_shuffle_epi32((A), (I)), \ + (__v16si)(__m512i)(W)); }) #define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \ - (__m512i)__builtin_ia32_pshufd512_mask((__v16si)(__m512i)(A), (int)(I), \ - (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U)); }) + (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ + (__v16si)_mm512_shuffle_epi32((A), (I)), \ + (__v16si)_mm512_setzero_si512()); }) static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A) diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 62e5e863aea..830be8d1590 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -9257,25 +9257,24 @@ _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) } #define _mm256_mask_shuffle_epi32(W, U, A, I) __extension__({\ - (__m256i)__builtin_ia32_pshufd256_mask((__v8si)(__m256i)(A), (int)(I), \ - (__v8si)(__m256i)(W), \ - (__mmask8)(U)); }) + (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_shuffle_epi32((A), (I)), \ + (__v8si)(__m256i)(W)); }) #define _mm256_maskz_shuffle_epi32(U, A, I) __extension__({\ - (__m256i)__builtin_ia32_pshufd256_mask((__v8si)(__m256i)(A), (int)(I), \ - (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(U)); }) + (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm256_shuffle_epi32((A), (I)), \ + (__v8si)_mm256_setzero_si256()); }) #define _mm_mask_shuffle_epi32(W, U, A, I) __extension__({\ - (__m128i)__builtin_ia32_pshufd128_mask((__v4si)(__m128i)(A), (int)(I), \ - (__v4si)(__m128i)(W), \ - (__mmask8)(U)); }) + (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + (__v4si)_mm_shuffle_epi32((A), (I)), \ + (__v4si)(__m128i)(W)); }) #define _mm_maskz_shuffle_epi32(U, A, I) __extension__({\ - (__m128i)__builtin_ia32_pshufd128_mask((__v4si)(__m128i)(A), (int)(I), \ - (__v4si)_mm_setzero_si128(), \ - (__mmask8)(U)); }) - + (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ + (__v4si)_mm_shuffle_epi32((A), (I)), \ + (__v4si)_mm_setzero_si128()); }) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 6f944e0d928..6bd910ed586 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1566,9 +1566,6 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_fpclasspd512_mask: case X86::BI__builtin_ia32_fpclasssd_mask: case X86::BI__builtin_ia32_fpclassss_mask: - case X86::BI__builtin_ia32_pshufd512_mask: - case X86::BI__builtin_ia32_pshufd256_mask: - case X86::BI__builtin_ia32_pshufd128_mask: i = 1; l = 0; u = 255; break; case X86::BI__builtin_ia32_palignr: diff --git a/clang/test/CodeGen/avx512f-builtins.c b/clang/test/CodeGen/avx512f-builtins.c index 547edd0f641..01cbaf5d075 100644 --- a/clang/test/CodeGen/avx512f-builtins.c +++ b/clang/test/CodeGen/avx512f-builtins.c @@ -5956,19 +5956,21 @@ __m512 test_mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) { __m512i test_mm512_shuffle_epi32(__m512i __A) { // CHECK-LABEL: @test_mm512_shuffle_epi32 - // CHECK: @llvm.x86.avx512.mask.pshuf.d.512 + // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12> return _mm512_shuffle_epi32(__A, 1); } __m512i test_mm512_mask_shuffle_epi32(__m512i __W, __mmask16 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_shuffle_epi32 - // CHECK: @llvm.x86.avx512.mask.pshuf.d.512 + // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_shuffle_epi32(__W, __U, __A, 1); } __m512i test_mm512_maskz_shuffle_epi32(__mmask16 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_shuffle_epi32 - // CHECK: @llvm.x86.avx512.mask.pshuf.d.512 + // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_shuffle_epi32(__U, __A, 1); } @@ -7433,4 +7435,4 @@ __m512d test_mm512_setzero_pd() // CHECK-LABEL: @test_mm512_setzero_pd // CHECK: zeroinitializer return _mm512_setzero_pd(); -}
\ No newline at end of file +} diff --git a/clang/test/CodeGen/avx512vl-builtins.c b/clang/test/CodeGen/avx512vl-builtins.c index e6b13395f97..45556897489 100644 --- a/clang/test/CodeGen/avx512vl-builtins.c +++ b/clang/test/CodeGen/avx512vl-builtins.c @@ -6593,25 +6593,29 @@ __m256 test_mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A) { __m128i test_mm_mask_shuffle_epi32(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm_mask_shuffle_epi32 - // CHECK: @llvm.x86.avx512.mask.pshuf.d.128 + // CHECK: shufflevector <4 x i32> %2, <4 x i32> %4, <4 x i32> <i32 1, i32 0, i32 0, i32 0> + // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_shuffle_epi32(__W, __U, __A, 1); } __m128i test_mm_maskz_shuffle_epi32(__mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm_maskz_shuffle_epi32 - // CHECK: @llvm.x86.avx512.mask.pshuf.d.128 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 0, i32 0, i32 0> + // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_shuffle_epi32(__U, __A, 2); } __m256i test_mm256_mask_shuffle_epi32(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_mask_shuffle_epi32 - // CHECK: @llvm.x86.avx512.mask.pshuf.d.256 + // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4> + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_shuffle_epi32(__W, __U, __A, 2); } __m256i test_mm256_maskz_shuffle_epi32(__mmask8 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_maskz_shuffle_epi32 - // CHECK: @llvm.x86.avx512.mask.pshuf.d.256 + // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4> + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_shuffle_epi32(__U, __A, 2); } |

