Enable AVX512_BF16 instructions, which are supported for BFLOAT16 in Cooper Lake

Summary: 1. Enable infrastructure of AVX512_BF16, which is supported for BFLOAT16 in Cooper Lake; 2. Enable VCVTNE2PS2BF16, VCVTNEPS2BF16 and DPBF16PS instructions, which are Vector Neural Network Instructions supporting BFLOAT16 inputs and conversion instructions from IEEE single precision. VCVTNE2PS2BF16: Convert Two Packed Single Data to One Packed BF16 Data. VCVTNEPS2BF16: Convert Packed Single Data to Packed BF16 Data. VDPBF16PS: Dot Product of BF16 Pairs Accumulated into Packed Single Precision. For more details about BF16 isa, please refer to the latest ISE document: https://software.intel.com/en-us/download/intel-architecture-instruction-set-extensions-programming-reference Author: LiuTianle Reviewers: craig.topper, smaslov, LuoYuanke, wxiao3, annita.zhang, RKSimon, spatel Reviewed By: craig.topper Subscribers: kristina, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D60550 llvm-svn: 360017
author: Luo, Yuanke <yuanke.luo@intel.com> 2019-05-06 08:22:37 +0000
committer: Luo, Yuanke <yuanke.luo@intel.com> 2019-05-06 08:22:37 +0000
commit: beec41c656e7d716fd5755cce12e4934fdced267 (patch)
tree: cbb53258bcd3f11adc2ee6a8467def1692b30623 /llvm/include
parent: fb607580046ed9fe2891151a23375f0c524d29b3 (diff)
download: bcm5719-llvm-beec41c656e7d716fd5755cce12e4934fdced267.tar.gz
bcm5719-llvm-beec41c656e7d716fd5755cce12e4934fdced267.zip
1 files changed, 38 insertions, 0 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 06b603a788b..2635e3d8648 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -4834,3 +4834,41 @@ let TargetPrefix = "x86" in {
   def int_x86_invpcid : GCCBuiltin<"__builtin_ia32_invpcid">,
               Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], []>;
 }
+
+let TargetPrefix = "x86" in {
+  def int_x86_avx512bf16_cvtne2ps2bf16_128:
+              GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+              [IntrNoMem]>;
+  def int_x86_avx512bf16_cvtne2ps2bf16_256:
+              GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v8f32_ty, llvm_v8f32_ty],
+              [IntrNoMem]>;
+  def int_x86_avx512bf16_cvtne2ps2bf16_512:
+              GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_512">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v16f32_ty, llvm_v16f32_ty],
+              [IntrNoMem]>;
+  // Intrinsic must be masked due to it producing less than 128 bits of results.
+  def int_x86_avx512bf16_mask_cvtneps2bf16_128:
+              Intrinsic<[llvm_v8i16_ty],
+                        [llvm_v4f32_ty, llvm_v8i16_ty, llvm_v4i1_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx512bf16_cvtneps2bf16_256:
+              GCCBuiltin<"__builtin_ia32_cvtneps2bf16_256">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+  def int_x86_avx512bf16_cvtneps2bf16_512:
+              GCCBuiltin<"__builtin_ia32_cvtneps2bf16_512">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty], [IntrNoMem]>;
+  def int_x86_avx512bf16_dpbf16ps_128:
+              GCCBuiltin<"__builtin_ia32_dpbf16ps_128">,
+              Intrinsic<[llvm_v4f32_ty],
+              [llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_avx512bf16_dpbf16ps_256:
+              GCCBuiltin<"__builtin_ia32_dpbf16ps_256">,
+              Intrinsic<[llvm_v8f32_ty],
+              [llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
+  def int_x86_avx512bf16_dpbf16ps_512:
+              GCCBuiltin<"__builtin_ia32_dpbf16ps_512">,
+              Intrinsic<[llvm_v16f32_ty],
+              [llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>;
+}
author	Luo, Yuanke <yuanke.luo@intel.com>	2019-05-06 08:22:37 +0000
committer	Luo, Yuanke <yuanke.luo@intel.com>	2019-05-06 08:22:37 +0000
commit	beec41c656e7d716fd5755cce12e4934fdced267 (patch)
tree	cbb53258bcd3f11adc2ee6a8467def1692b30623 /llvm/include
parent	fb607580046ed9fe2891151a23375f0c524d29b3 (diff)
download	bcm5719-llvm-beec41c656e7d716fd5755cce12e4934fdced267.tar.gz bcm5719-llvm-beec41c656e7d716fd5755cce12e4934fdced267.zip