summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2016-07-10 05:57:21 +0000
committerCraig Topper <craig.topper@gmail.com>2016-07-10 05:57:21 +0000
commit6e76fb61a745f30e3e57e907d82e9dbc4dc1669f (patch)
treed7ccbbc047d276c5295ff5a8dc8cb46f79c2f178
parent0b0954570a83fc47d72db86db529d4b5b70ab05c (diff)
downloadbcm5719-llvm-6e76fb61a745f30e3e57e907d82e9dbc4dc1669f.tar.gz
bcm5719-llvm-6e76fb61a745f30e3e57e907d82e9dbc4dc1669f.zip
[X86] Use __butilin_shufflevector for 512-bit shufps intrinsics.
llvm-svn: 275012
-rw-r--r--clang/lib/Headers/avx512fintrin.h44
-rw-r--r--clang/test/CodeGen/avx512f-builtins.c8
2 files changed, 33 insertions, 19 deletions
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index c85c793bf48..e70d7516888 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -7181,23 +7181,35 @@ _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
(__v8df)_mm512_shuffle_pd((A), (B), (M)), \
(__v8df)_mm512_setzero_pd()); })
-#define _mm512_shuffle_ps(M, V, imm) __extension__ ({ \
- (__m512)__builtin_ia32_shufps512_mask((__v16sf)(__m512)(M), \
- (__v16sf)(__m512)(V), (int)(imm), \
- (__v16sf)_mm512_undefined_ps(), \
- (__mmask16)-1); })
-
-#define _mm512_mask_shuffle_ps(W, U, M, V, imm) __extension__ ({ \
- (__m512)__builtin_ia32_shufps512_mask((__v16sf)(__m512)(M), \
- (__v16sf)(__m512)(V), (int)(imm), \
- (__v16sf)(__m512)(W), \
- (__mmask16)(U)); })
+#define _mm512_shuffle_ps(A, B, M) __extension__ ({ \
+ (__m512d)__builtin_shufflevector((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ 0 + (((M) >> 0) & 0x3), \
+ 0 + (((M) >> 2) & 0x3), \
+ 16 + (((M) >> 4) & 0x3), \
+ 16 + (((M) >> 6) & 0x3), \
+ 4 + (((M) >> 0) & 0x3), \
+ 4 + (((M) >> 2) & 0x3), \
+ 20 + (((M) >> 4) & 0x3), \
+ 20 + (((M) >> 6) & 0x3), \
+ 8 + (((M) >> 0) & 0x3), \
+ 8 + (((M) >> 2) & 0x3), \
+ 24 + (((M) >> 4) & 0x3), \
+ 24 + (((M) >> 6) & 0x3), \
+ 12 + (((M) >> 0) & 0x3), \
+ 12 + (((M) >> 2) & 0x3), \
+ 28 + (((M) >> 4) & 0x3), \
+ 28 + (((M) >> 6) & 0x3)); })
+
+#define _mm512_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
+ (__v16sf)(__m512)(W)); })
-#define _mm512_maskz_shuffle_ps(U, M, V, imm) __extension__ ({ \
- (__m512)__builtin_ia32_shufps512_mask((__v16sf)(__m512)(M), \
- (__v16sf)(__m512)(V), (int)(imm), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(U)); })
+#define _mm512_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
+ (__v16sf)_mm512_setzero_ps()); })
#define _mm_sqrt_round_sd(A, B, R) __extension__ ({ \
(__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
diff --git a/clang/test/CodeGen/avx512f-builtins.c b/clang/test/CodeGen/avx512f-builtins.c
index b8121b2f75a..e872f4ccbb2 100644
--- a/clang/test/CodeGen/avx512f-builtins.c
+++ b/clang/test/CodeGen/avx512f-builtins.c
@@ -4234,19 +4234,21 @@ __m512d test_mm512_maskz_shuffle_pd(__mmask8 __U, __m512d __M, __m512d __V) {
__m512 test_mm512_shuffle_ps(__m512 __M, __m512 __V) {
// CHECK-LABEL: @test_mm512_shuffle_ps
- // CHECK: @llvm.x86.avx512.mask.shuf.ps.512
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 4, i32 5, i32 20, i32 20, i32 8, i32 9, i32 24, i32 24, i32 12, i32 13, i32 28, i32 28>
return _mm512_shuffle_ps(__M, __V, 4);
}
__m512 test_mm512_mask_shuffle_ps(__m512 __W, __mmask16 __U, __m512 __M, __m512 __V) {
// CHECK-LABEL: @test_mm512_mask_shuffle_ps
- // CHECK: @llvm.x86.avx512.mask.shuf.ps.512
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 4, i32 5, i32 20, i32 20, i32 8, i32 9, i32 24, i32 24, i32 12, i32 13, i32 28, i32 28>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_shuffle_ps(__W, __U, __M, __V, 4);
}
__m512 test_mm512_maskz_shuffle_ps(__mmask16 __U, __m512 __M, __m512 __V) {
// CHECK-LABEL: @test_mm512_maskz_shuffle_ps
- // CHECK: @llvm.x86.avx512.mask.shuf.ps.512
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 4, i32 5, i32 20, i32 20, i32 8, i32 9, i32 24, i32 24, i32 12, i32 13, i32 28, i32 28>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_shuffle_ps(__U, __M, __V, 4);
}
OpenPOWER on IntegriCloud