diff options
| author | Asaf Badouh <asaf.badouh@intel.com> | 2015-09-02 14:21:54 +0000 |
|---|---|---|
| committer | Asaf Badouh <asaf.badouh@intel.com> | 2015-09-02 14:21:54 +0000 |
| commit | d2c3599c5f777f372087cde87510c65e3dc91b1e (patch) | |
| tree | 17662b80ee7243601831366e1d70cf362e354f3c /llvm/test/CodeGen | |
| parent | b684e381c9c8df091b255d010bee6e3ae53f9e85 (diff) | |
| download | bcm5719-llvm-d2c3599c5f777f372087cde87510c65e3dc91b1e.tar.gz bcm5719-llvm-d2c3599c5f777f372087cde87510c65e3dc91b1e.zip | |
[X86][AVX512VLBW] add support in byte shift and SAD
add byte shift left/right
add SAD - compute sum of absolute differences
Differential Revision: http://reviews.llvm.org/D12479
llvm-svn: 246654
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-isa-check.ll | 7 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bw-intrinsics.ll | 38 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll | 1 |
3 files changed, 45 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/X86/avx-isa-check.ll b/llvm/test/CodeGen/X86/avx-isa-check.ll index d551e2331d4..4d8db7df8b0 100644 --- a/llvm/test/CodeGen/X86/avx-isa-check.ll +++ b/llvm/test/CodeGen/X86/avx-isa-check.ll @@ -261,4 +261,9 @@ define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) { %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1> %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double> ret <2 x double> %bitcast64 -}
\ No newline at end of file +} + +define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) { + %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24> + ret <16 x i16> %shuffle +} diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll index ab724da6927..6376657cf16 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1221,3 +1221,41 @@ define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %res4 = add <32 x i16> %res3, %res2 ret <32 x i16> %res4 } + +declare <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64>, i32) + +; CHECK-LABEL: @test_int_x86_avx512_mask_psll_dq_512 +; CHECK-NOT: call +; CHECK: vpslldq +; CHECK: vpslldq +define <8 x i64>@test_int_x86_avx512_mask_psll_dq_512(<8 x i64> %x0) { + %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 8) + %res1 = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4) + %res2 = add <8 x i64> %res, %res1 + ret <8 x i64> %res2 +} + +declare <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64>, i32) + +; CHECK-LABEL: @test_int_x86_avx512_mask_psrl_dq_512 +; CHECK-NOT: call +; CHECK: vpsrldq +; CHECK: vpsrldq +define <8 x i64>@test_int_x86_avx512_mask_psrl_dq_512(<8 x i64> %x0) { + %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 8) + %res1 = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4) + %res2 = add <8 x i64> %res, %res1 + ret <8 x i64> %res2 +} +declare <64 x i8> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>) + +; CHECK-LABEL: @test_int_x86_avx512_mask_psadb_w_512 +; CHECK-NOT: call +; CHECK: vpsadbw %zmm1 +; CHECK: vpsadbw %zmm2 +define <64 x i8>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){ + %res = call <64 x i8> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1) + %res1 = call <64 x i8> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2) + %res2 = add <64 x i8> %res, %res1 + ret <64 x i8> %res2 +} diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll index 3397c770d42..bb8a1f51cb0 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -4275,3 +4275,4 @@ define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %res4 = add <16 x i16> %res3, %res2 ret <16 x i16> %res4 } + |

