diff options
| author | Luo, Yuanke <yuanke.luo@intel.com> | 2019-05-06 08:22:37 +0000 |
|---|---|---|
| committer | Luo, Yuanke <yuanke.luo@intel.com> | 2019-05-06 08:22:37 +0000 |
| commit | beec41c656e7d716fd5755cce12e4934fdced267 (patch) | |
| tree | cbb53258bcd3f11adc2ee6a8467def1692b30623 /llvm/include | |
| parent | fb607580046ed9fe2891151a23375f0c524d29b3 (diff) | |
| download | bcm5719-llvm-beec41c656e7d716fd5755cce12e4934fdced267.tar.gz bcm5719-llvm-beec41c656e7d716fd5755cce12e4934fdced267.zip | |
Enable AVX512_BF16 instructions, which are supported for BFLOAT16 in Cooper Lake
Summary:
1. Enable infrastructure of AVX512_BF16, which is supported for BFLOAT16 in Cooper Lake;
2. Enable VCVTNE2PS2BF16, VCVTNEPS2BF16 and DPBF16PS instructions, which are Vector Neural Network Instructions supporting BFLOAT16 inputs and conversion instructions from IEEE single precision.
VCVTNE2PS2BF16: Convert Two Packed Single Data to One Packed BF16 Data.
VCVTNEPS2BF16: Convert Packed Single Data to Packed BF16 Data.
VDPBF16PS: Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
For more details about BF16 isa, please refer to the latest ISE document: https://software.intel.com/en-us/download/intel-architecture-instruction-set-extensions-programming-reference
Author: LiuTianle
Reviewers: craig.topper, smaslov, LuoYuanke, wxiao3, annita.zhang, RKSimon, spatel
Reviewed By: craig.topper
Subscribers: kristina, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60550
llvm-svn: 360017
Diffstat (limited to 'llvm/include')
| -rw-r--r-- | llvm/include/llvm/IR/IntrinsicsX86.td | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 06b603a788b..2635e3d8648 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -4834,3 +4834,41 @@ let TargetPrefix = "x86" in { def int_x86_invpcid : GCCBuiltin<"__builtin_ia32_invpcid">, Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], []>; } + +let TargetPrefix = "x86" in { + def int_x86_avx512bf16_cvtne2ps2bf16_128: + GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_128">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; + def int_x86_avx512bf16_cvtne2ps2bf16_256: + GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_256">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; + def int_x86_avx512bf16_cvtne2ps2bf16_512: + GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_512">, + Intrinsic<[llvm_v32i16_ty], [llvm_v16f32_ty, llvm_v16f32_ty], + [IntrNoMem]>; + // Intrinsic must be masked due to it producing less than 128 bits of results. + def int_x86_avx512bf16_mask_cvtneps2bf16_128: + Intrinsic<[llvm_v8i16_ty], + [llvm_v4f32_ty, llvm_v8i16_ty, llvm_v4i1_ty], + [IntrNoMem]>; + def int_x86_avx512bf16_cvtneps2bf16_256: + GCCBuiltin<"__builtin_ia32_cvtneps2bf16_256">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty], [IntrNoMem]>; + def int_x86_avx512bf16_cvtneps2bf16_512: + GCCBuiltin<"__builtin_ia32_cvtneps2bf16_512">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty], [IntrNoMem]>; + def int_x86_avx512bf16_dpbf16ps_128: + GCCBuiltin<"__builtin_ia32_dpbf16ps_128">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + def int_x86_avx512bf16_dpbf16ps_256: + GCCBuiltin<"__builtin_ia32_dpbf16ps_256">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; + def int_x86_avx512bf16_dpbf16ps_512: + GCCBuiltin<"__builtin_ia32_dpbf16ps_512">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>; +} |

