[AVX512] Intrinsics for vextract*x4

This adds the Pat<>'s for the intrinsics. These are necessary because we don't lower these intrinsics to SDNodes but match them directly. See the rational in the previous commit. llvm-svn: 219362
author: Adam Nemet <anemet@apple.com> 2014-10-08 23:25:37 +0000
committer: Adam Nemet <anemet@apple.com> 2014-10-08 23:25:37 +0000
commit: 47b2d5f1e0dcd440302c7aebc3e448c2baf9f46b (patch)
tree: 2c6be453466337e21d1ef7013c6b07835a4f3268
parent: 2b5cdbb3de3c92faa2b63f33b8054060b5e363cd (diff)
download: bcm5719-llvm-47b2d5f1e0dcd440302c7aebc3e448c2baf9f46b.tar.gz
bcm5719-llvm-47b2d5f1e0dcd440302c7aebc3e448c2baf9f46b.zip
3 files changed, 76 insertions, 0 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 46e2aa23da8..87c7c870033 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -1791,6 +1791,23 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx2_vinserti128 : GCCBuiltin<"__builtin_ia32_insert128i256">,
               Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
                          llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vextractf32x4_512 :
+      GCCBuiltin<"__builtin_ia32_extractf32x4_mask">,
+                 Intrinsic<[llvm_v4f32_ty], [llvm_v16f32_ty, llvm_i8_ty,
+                           llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vextracti32x4_512 :
+      GCCBuiltin<"__builtin_ia32_extracti32x4_mask">,
+                 Intrinsic<[llvm_v4i32_ty], [llvm_v16i32_ty, llvm_i8_ty,
+                           llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vextractf64x4_512 :
+      GCCBuiltin<"__builtin_ia32_extractf64x4_mask">,
+                 Intrinsic<[llvm_v4f64_ty], [llvm_v8f64_ty, llvm_i8_ty,
+                           llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vextracti64x4_512 :
+      GCCBuiltin<"__builtin_ia32_extracti64x4_mask">,
+                 Intrinsic<[llvm_v4i64_ty], [llvm_v8i64_ty, llvm_i8_ty,
+                           llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
 }
 
 // Conditional load ops
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 987ff6cab2c..7c209215aa3 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -449,6 +449,29 @@ multiclass vextract_for_size<int Opcode,
   def : Pat<(AltTo.VT (extract_subvector (AltFrom.VT VR512:$src), (iPTR 0))),
             (AltTo.VT
                (EXTRACT_SUBREG (AltFrom.VT VR512:$src), AltTo.SubRegIdx))>;
+
+  // Intrinsic call with masking.
+  def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
+                              "x4_512")
+                VR512:$src1, (iPTR imm:$idx), To.RC:$src0, GR8:$mask),
+            (!cast<Instruction>(NAME # To.EltSize # "x4rrk") To.RC:$src0,
+                (v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)),
+                VR512:$src1, imm:$idx)>;
+
+  // Intrinsic call with zero-masking.
+  def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
+                              "x4_512")
+                VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, GR8:$mask),
+            (!cast<Instruction>(NAME # To.EltSize # "x4rrkz")
+                (v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)),
+                VR512:$src1, imm:$idx)>;
+
+  // Intrinsic call without masking.
+  def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
+                              "x4_512")
+                VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)),
+            (!cast<Instruction>(NAME # To.EltSize # "x4rr")
+                VR512:$src1, imm:$idx)>;
 }
 
 multiclass vextract_for_type<ValueType EltVT32, int Opcode32,
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
index cbb819a112c..300b702764e 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
@@ -949,3 +949,39 @@ define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
 }
 
 declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
+
+define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
+; CHECK-LABEL: test_mask_vextractf32x4:
+; CHECK: vextractf32x4 $2, %zmm1, %xmm0 {%k1}
+  %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i8 2, <4 x float> %b, i8 %mask)
+  ret <4 x float> %res
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i8, <4 x float>, i8)
+
+define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
+; CHECK-LABEL: test_mask_vextracti64x4:
+; CHECK: vextracti64x4 $2, %zmm1, %ymm0 {%k1}
+  %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i8 2, <4 x i64> %b, i8 %mask)
+  ret <4 x i64> %res
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i8, <4 x i64>, i8)
+
+define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
+; CHECK-LABEL: test_maskz_vextracti32x4:
+; CHECK: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
+  %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i8 2, <4 x i32> zeroinitializer, i8 %mask)
+  ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i8, <4 x i32>, i8)
+
+define <4 x double> @test_vextractf64x4(<8 x double> %a) {
+; CHECK-LABEL: test_vextractf64x4:
+; CHECK: vextractf64x4 $2, %zmm0, %ymm0 ##
+  %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i8 2, <4 x double> zeroinitializer, i8 -1)
+  ret <4 x double> %res
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i8, <4 x double>, i8)
author	Adam Nemet <anemet@apple.com>	2014-10-08 23:25:37 +0000
committer	Adam Nemet <anemet@apple.com>	2014-10-08 23:25:37 +0000
commit	47b2d5f1e0dcd440302c7aebc3e448c2baf9f46b (patch)
tree	2c6be453466337e21d1ef7013c6b07835a4f3268
parent	2b5cdbb3de3c92faa2b63f33b8054060b5e363cd (diff)
download	bcm5719-llvm-47b2d5f1e0dcd440302c7aebc3e448c2baf9f46b.tar.gz bcm5719-llvm-47b2d5f1e0dcd440302c7aebc3e448c2baf9f46b.zip