summaryrefslogtreecommitdiffstats
path: root/llvm/include
diff options
context:
space:
mode:
authorLuo, Yuanke <yuanke.luo@intel.com>2019-05-06 08:22:37 +0000
committerLuo, Yuanke <yuanke.luo@intel.com>2019-05-06 08:22:37 +0000
commitbeec41c656e7d716fd5755cce12e4934fdced267 (patch)
treecbb53258bcd3f11adc2ee6a8467def1692b30623 /llvm/include
parentfb607580046ed9fe2891151a23375f0c524d29b3 (diff)
downloadbcm5719-llvm-beec41c656e7d716fd5755cce12e4934fdced267.tar.gz
bcm5719-llvm-beec41c656e7d716fd5755cce12e4934fdced267.zip
Enable AVX512_BF16 instructions, which are supported for BFLOAT16 in Cooper Lake
Summary: 1. Enable infrastructure of AVX512_BF16, which is supported for BFLOAT16 in Cooper Lake; 2. Enable VCVTNE2PS2BF16, VCVTNEPS2BF16 and DPBF16PS instructions, which are Vector Neural Network Instructions supporting BFLOAT16 inputs and conversion instructions from IEEE single precision. VCVTNE2PS2BF16: Convert Two Packed Single Data to One Packed BF16 Data. VCVTNEPS2BF16: Convert Packed Single Data to Packed BF16 Data. VDPBF16PS: Dot Product of BF16 Pairs Accumulated into Packed Single Precision. For more details about BF16 isa, please refer to the latest ISE document: https://software.intel.com/en-us/download/intel-architecture-instruction-set-extensions-programming-reference Author: LiuTianle Reviewers: craig.topper, smaslov, LuoYuanke, wxiao3, annita.zhang, RKSimon, spatel Reviewed By: craig.topper Subscribers: kristina, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D60550 llvm-svn: 360017
Diffstat (limited to 'llvm/include')
-rw-r--r--llvm/include/llvm/IR/IntrinsicsX86.td38
1 files changed, 38 insertions, 0 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 06b603a788b..2635e3d8648 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -4834,3 +4834,41 @@ let TargetPrefix = "x86" in {
def int_x86_invpcid : GCCBuiltin<"__builtin_ia32_invpcid">,
Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], []>;
}
+
+let TargetPrefix = "x86" in {
+ def int_x86_avx512bf16_cvtne2ps2bf16_128:
+ GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_128">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512bf16_cvtne2ps2bf16_256:
+ GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_256">,
+ Intrinsic<[llvm_v16i16_ty], [llvm_v8f32_ty, llvm_v8f32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512bf16_cvtne2ps2bf16_512:
+ GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_512">,
+ Intrinsic<[llvm_v32i16_ty], [llvm_v16f32_ty, llvm_v16f32_ty],
+ [IntrNoMem]>;
+ // Intrinsic must be masked due to it producing less than 128 bits of results.
+ def int_x86_avx512bf16_mask_cvtneps2bf16_128:
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v4f32_ty, llvm_v8i16_ty, llvm_v4i1_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512bf16_cvtneps2bf16_256:
+ GCCBuiltin<"__builtin_ia32_cvtneps2bf16_256">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+ def int_x86_avx512bf16_cvtneps2bf16_512:
+ GCCBuiltin<"__builtin_ia32_cvtneps2bf16_512">,
+ Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty], [IntrNoMem]>;
+ def int_x86_avx512bf16_dpbf16ps_128:
+ GCCBuiltin<"__builtin_ia32_dpbf16ps_128">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
+ def int_x86_avx512bf16_dpbf16ps_256:
+ GCCBuiltin<"__builtin_ia32_dpbf16ps_256">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
+ def int_x86_avx512bf16_dpbf16ps_512:
+ GCCBuiltin<"__builtin_ia32_dpbf16ps_512">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>;
+}
OpenPOWER on IntegriCloud