diff options
| author | Adam Nemet <anemet@apple.com> | 2014-10-08 23:25:37 +0000 |
|---|---|---|
| committer | Adam Nemet <anemet@apple.com> | 2014-10-08 23:25:37 +0000 |
| commit | 47b2d5f1e0dcd440302c7aebc3e448c2baf9f46b (patch) | |
| tree | 2c6be453466337e21d1ef7013c6b07835a4f3268 | |
| parent | 2b5cdbb3de3c92faa2b63f33b8054060b5e363cd (diff) | |
| download | bcm5719-llvm-47b2d5f1e0dcd440302c7aebc3e448c2baf9f46b.tar.gz bcm5719-llvm-47b2d5f1e0dcd440302c7aebc3e448c2baf9f46b.zip | |
[AVX512] Intrinsics for vextract*x4
This adds the Pat<>'s for the intrinsics. These are necessary because we
don't lower these intrinsics to SDNodes but match them directly. See the
rational in the previous commit.
llvm-svn: 219362
| -rw-r--r-- | llvm/include/llvm/IR/IntrinsicsX86.td | 17 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 23 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics.ll | 36 |
3 files changed, 76 insertions, 0 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 46e2aa23da8..87c7c870033 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -1791,6 +1791,23 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_vinserti128 : GCCBuiltin<"__builtin_ia32_insert128i256">, Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_vextractf32x4_512 : + GCCBuiltin<"__builtin_ia32_extractf32x4_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v16f32_ty, llvm_i8_ty, + llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vextracti32x4_512 : + GCCBuiltin<"__builtin_ia32_extracti32x4_mask">, + Intrinsic<[llvm_v4i32_ty], [llvm_v16i32_ty, llvm_i8_ty, + llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vextractf64x4_512 : + GCCBuiltin<"__builtin_ia32_extractf64x4_mask">, + Intrinsic<[llvm_v4f64_ty], [llvm_v8f64_ty, llvm_i8_ty, + llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_vextracti64x4_512 : + GCCBuiltin<"__builtin_ia32_extracti64x4_mask">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i64_ty, llvm_i8_ty, + llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; } // Conditional load ops diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 987ff6cab2c..7c209215aa3 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -449,6 +449,29 @@ multiclass vextract_for_size<int Opcode, def : Pat<(AltTo.VT (extract_subvector (AltFrom.VT VR512:$src), (iPTR 0))), (AltTo.VT (EXTRACT_SUBREG (AltFrom.VT VR512:$src), AltTo.SubRegIdx))>; + + // Intrinsic call with masking. + def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName # + "x4_512") + VR512:$src1, (iPTR imm:$idx), To.RC:$src0, GR8:$mask), + (!cast<Instruction>(NAME # To.EltSize # "x4rrk") To.RC:$src0, + (v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)), + VR512:$src1, imm:$idx)>; + + // Intrinsic call with zero-masking. + def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName # + "x4_512") + VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, GR8:$mask), + (!cast<Instruction>(NAME # To.EltSize # "x4rrkz") + (v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)), + VR512:$src1, imm:$idx)>; + + // Intrinsic call without masking. + def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName # + "x4_512") + VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)), + (!cast<Instruction>(NAME # To.EltSize # "x4rr") + VR512:$src1, imm:$idx)>; } multiclass vextract_for_type<ValueType EltVT32, int Opcode32, diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index cbb819a112c..300b702764e 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -949,3 +949,39 @@ define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { } declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone + +define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) { +; CHECK-LABEL: test_mask_vextractf32x4: +; CHECK: vextractf32x4 $2, %zmm1, %xmm0 {%k1} + %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i8 2, <4 x float> %b, i8 %mask) + ret <4 x float> %res +} + +declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i8, <4 x float>, i8) + +define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) { +; CHECK-LABEL: test_mask_vextracti64x4: +; CHECK: vextracti64x4 $2, %zmm1, %ymm0 {%k1} + %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i8 2, <4 x i64> %b, i8 %mask) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i8, <4 x i64>, i8) + +define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) { +; CHECK-LABEL: test_maskz_vextracti32x4: +; CHECK: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z} + %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i8 2, <4 x i32> zeroinitializer, i8 %mask) + ret <4 x i32> %res +} + +declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i8, <4 x i32>, i8) + +define <4 x double> @test_vextractf64x4(<8 x double> %a) { +; CHECK-LABEL: test_vextractf64x4: +; CHECK: vextractf64x4 $2, %zmm0, %ymm0 ## + %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i8 2, <4 x double> zeroinitializer, i8 -1) + ret <4 x double> %res +} + +declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i8, <4 x double>, i8) |

