summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-07-04 11:06:15 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-07-04 11:06:15 +0000
commit17388f2569007ad264125ad4c28d1b36b2bb3dbf (patch)
tree47b8b09b5209d717b56df0322b7ac69cdedfc8b6
parent7cf00d191ab43c30063f2e6543d2e6fee87c64a2 (diff)
downloadbcm5719-llvm-17388f2569007ad264125ad4c28d1b36b2bb3dbf.tar.gz
bcm5719-llvm-17388f2569007ad264125ad4c28d1b36b2bb3dbf.zip
[X86][AVX512] Converted the VPERMILPD/VPERMILPS intrinsics to generic IR
llvm-svn: 274492
-rw-r--r--clang/lib/Headers/avx512fintrin.h58
-rw-r--r--clang/test/CodeGen/avx512f-builtins.c28
2 files changed, 56 insertions, 30 deletions
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index b5c468a1ca4..ab8f3d1b41d 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -6540,34 +6540,56 @@ _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
}
#define _mm512_permute_pd(X, C) __extension__ ({ \
- (__m512d)__builtin_ia32_vpermilpd512_mask((__v8df)(__m512d)(X), (int)(C), \
- (__v8df)_mm512_undefined_pd(), \
- (__mmask8)-1); })
+ (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
+ (__v8df)_mm512_setzero_pd(), \
+ 0 + (((C) & 0x01) >> 0), \
+ 0 + (((C) & 0x02) >> 1), \
+ 2 + (((C) & 0x04) >> 2), \
+ 2 + (((C) & 0x08) >> 3), \
+ 4 + (((C) & 0x10) >> 4), \
+ 4 + (((C) & 0x20) >> 5), \
+ 6 + (((C) & 0x40) >> 6), \
+ 6 + (((C) & 0x80) >> 7)); })
#define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \
- (__m512d)__builtin_ia32_vpermilpd512_mask((__v8df)(__m512d)(X), (int)(C), \
- (__v8df)(__m512d)(W), \
- (__mmask8)(U)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_permute_pd((X), (C)), \
+ (__v8df)(__m512d)(W)); })
#define _mm512_maskz_permute_pd(U, X, C) __extension__ ({ \
- (__m512d)__builtin_ia32_vpermilpd512_mask((__v8df)(__m512d)(X), (int)(C), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_permute_pd((X), (C)), \
+ (__v8df)_mm512_setzero_pd()); })
#define _mm512_permute_ps(X, C) __extension__ ({ \
- (__m512)__builtin_ia32_vpermilps512_mask((__v16sf)(__m512)(X), (int)(C), \
- (__v16sf)_mm512_undefined_ps(), \
- (__mmask16)-1); })
+ (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \
+ (__v16sf)_mm512_setzero_ps(), \
+ 0 + (((C) & 0x03) >> 0), \
+ 0 + (((C) & 0x0c) >> 2), \
+ 0 + (((C) & 0x30) >> 4), \
+ 0 + (((C) & 0xc0) >> 6), \
+ 4 + (((C) & 0x03) >> 0), \
+ 4 + (((C) & 0x0c) >> 2), \
+ 4 + (((C) & 0x30) >> 4), \
+ 4 + (((C) & 0xc0) >> 6), \
+ 8 + (((C) & 0x03) >> 0), \
+ 8 + (((C) & 0x0c) >> 2), \
+ 8 + (((C) & 0x30) >> 4), \
+ 8 + (((C) & 0xc0) >> 6), \
+ 12 + (((C) & 0x03) >> 0), \
+ 12 + (((C) & 0x0c) >> 2), \
+ 12 + (((C) & 0x30) >> 4), \
+ 12 + (((C) & 0xc0) >> 6)); })
#define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \
- (__m512)__builtin_ia32_vpermilps512_mask((__v16sf)(__m512)(X), (int)(C), \
- (__v16sf)(__m512)(W), \
- (__mmask16)(U)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_permute_ps((X), (C)), \
+ (__v16sf)(__m512)(W)); })
#define _mm512_maskz_permute_ps(U, X, C) __extension__ ({ \
- (__m512)__builtin_ia32_vpermilps512_mask((__v16sf)(__m512)(X), (int)(C), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(U)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_permute_ps((X), (C)), \
+ (__v16sf)_mm512_setzero_ps()); })
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_permutevar_pd (__m512d __A, __m512i __C)
diff --git a/clang/test/CodeGen/avx512f-builtins.c b/clang/test/CodeGen/avx512f-builtins.c
index d638b03deeb..f57ba0bd3a5 100644
--- a/clang/test/CodeGen/avx512f-builtins.c
+++ b/clang/test/CodeGen/avx512f-builtins.c
@@ -3409,38 +3409,42 @@ __m512i test_mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 _
__m512d test_mm512_permute_pd(__m512d __X) {
// CHECK-LABEL: @test_mm512_permute_pd
- // CHECK: @llvm.x86.avx512.mask.vpermil.pd.512
- return _mm512_permute_pd(__X, 2);
+ // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+ return _mm512_permute_pd(__X, 2);
}
__m512d test_mm512_mask_permute_pd(__m512d __W, __mmask8 __U, __m512d __X) {
// CHECK-LABEL: @test_mm512_mask_permute_pd
- // CHECK: @llvm.x86.avx512.mask.vpermil.pd.512
- return _mm512_mask_permute_pd(__W, __U, __X, 2);
+ // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+ return _mm512_mask_permute_pd(__W, __U, __X, 2);
}
__m512d test_mm512_maskz_permute_pd(__mmask8 __U, __m512d __X) {
// CHECK-LABEL: @test_mm512_maskz_permute_pd
- // CHECK: @llvm.x86.avx512.mask.vpermil.pd.512
- return _mm512_maskz_permute_pd(__U, __X, 2);
+ // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
+ return _mm512_maskz_permute_pd(__U, __X, 2);
}
__m512 test_mm512_permute_ps(__m512 __X) {
// CHECK-LABEL: @test_mm512_permute_ps
- // CHECK: @llvm.x86.avx512.mask.vpermil.ps.512
- return _mm512_permute_ps(__X, 2);
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
+ return _mm512_permute_ps(__X, 2);
}
__m512 test_mm512_mask_permute_ps(__m512 __W, __mmask16 __U, __m512 __X) {
// CHECK-LABEL: @test_mm512_mask_permute_ps
- // CHECK: @llvm.x86.avx512.mask.vpermil.ps.512
- return _mm512_mask_permute_ps(__W, __U, __X, 2);
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+ return _mm512_mask_permute_ps(__W, __U, __X, 2);
}
__m512 test_mm512_maskz_permute_ps(__mmask16 __U, __m512 __X) {
// CHECK-LABEL: @test_mm512_maskz_permute_ps
- // CHECK: @llvm.x86.avx512.mask.vpermil.ps.512
- return _mm512_maskz_permute_ps(__U, __X, 2);
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+ return _mm512_maskz_permute_ps(__U, __X, 2);
}
__m512d test_mm512_permutevar_pd(__m512d __A, __m512i __C) {
OpenPOWER on IntegriCloud