summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorAsaf Badouh <asaf.badouh@intel.com>2015-10-27 15:37:17 +0000
committerAsaf Badouh <asaf.badouh@intel.com>2015-10-27 15:37:17 +0000
commitc7cb88066990d9b794c208252e13ef48a56b2439 (patch)
tree692997512139343c1f5773ca32d066a90f5d7779 /llvm/lib/Target
parentce0d48adbb8ac9350caf7513ed4e5e99404f0d0f (diff)
downloadbcm5719-llvm-c7cb88066990d9b794c208252e13ef48a56b2439.tar.gz
bcm5719-llvm-c7cb88066990d9b794c208252e13ef48a56b2439.zip
[X86][AVX512] [X86][AVX512] add convert float to half
convert float to half with mask/maskz for the reg to reg version and mask for the reg to mem version (there is no maskz version for reg to mem). Differential Revision: http://reviews.llvm.org/D14113 llvm-svn: 251409
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td62
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td5
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h6
3 files changed, 52 insertions, 21 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 5a2a9c7d788..ca124ed253c 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -5345,8 +5345,7 @@ multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
}
}
-multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
- X86MemOperand x86memop> {
+multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
defm rb : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),
"vcvtph2ps", "{sae}, $src", "$src, {sae}",
(X86cvtph2ps (_src.VT _src.RC:$src),
@@ -5356,7 +5355,7 @@ multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
let Predicates = [HasAVX512] in {
defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64>,
- avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, f256mem>,
+ avx512_cvtph2ps_sae<v16f32_info, v16i16x_info>,
EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
let Predicates = [HasVLX] in {
defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
@@ -5366,25 +5365,46 @@ let Predicates = [HasAVX512] in {
}
}
-multiclass avx512_cvtps2ph<RegisterClass destRC, RegisterClass srcRC,
- X86MemOperand x86memop> {
- def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst),
- (ins srcRC:$src1, i32u8imm:$src2),
- "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX;
- let hasSideEffects = 0, mayStore = 1 in
- def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
- (ins x86memop:$dst, srcRC:$src1, i32u8imm:$src2),
- "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
+multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
+ X86MemOperand x86memop> {
+ defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
+ (ins _src.RC:$src1, i32u8imm:$src2),
+ "vcvtps2ph", "$src2, $src1", "$src1, $src2",
+ (X86cvtps2ph (_src.VT _src.RC:$src1),
+ (i32 imm:$src2),
+ (i32 FROUND_CURRENT))>, AVX512AIi8Base;
+ let hasSideEffects = 0, mayStore = 1 in {
+ def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
+ (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(store (_dest.VT (X86cvtps2ph (_src.VT _src.RC:$src1),
+ (i32 imm:$src2), (i32 FROUND_CURRENT) )),
+ addr:$dst)]>;
+ def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
+ (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
+ []>, EVEX_K;
+ }
+}
+multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
+ defm rb : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
+ (ins _src.RC:$src1, i32u8imm:$src2),
+ "vcvtps2ph", "$src2, {sae}, $src1", "$src1, $src2, {sae}",
+ (X86cvtps2ph (_src.VT _src.RC:$src1),
+ (i32 imm:$src2),
+ (i32 FROUND_NO_EXC))>, EVEX_B, AVX512AIi8Base;
+}
+let Predicates = [HasAVX512] in {
+ defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem>,
+ avx512_cvtps2ph_sae<v16i16x_info, v16f32_info>,
+ EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
+ let Predicates = [HasVLX] in {
+ defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem>,
+ EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
+ defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f128mem>,
+ EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
+ }
}
-
-defm VCVTPS2PHZ : avx512_cvtps2ph<VR256X, VR512, f256mem>, EVEX_V512,
- EVEX_CD8<32, CD8VH>;
-
-def : Pat<(v16i16 (int_x86_avx512_mask_vcvtps2ph_512 (v16f32 VR512:$src),
- imm:$rc, (bc_v16i16(v8i32 immAllZerosV)), (i16 -1))),
- (VCVTPS2PHZrr VR512:$src, imm:$rc)>;
-
let Defs = [EFLAGS], Predicates = [HasAVX512] in {
defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
"ucomiss">, PS, EVEX, VEX_LIG,
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 7a8edc7964b..d7c90c1780a 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -513,6 +513,11 @@ def X86cvtph2ps : SDNode<"ISD::FP16_TO_FP",
SDTCVecEltisVT<1, i16>,
SDTCisFP<0>, SDTCisInt<2>]> >;
+def X86cvtps2ph : SDNode<"ISD::FP_TO_FP16",
+ SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCVecEltisVT<0, i16>,
+ SDTCVecEltisVT<1, f32>,
+ SDTCisFP<1>, SDTCisInt<2>, SDTCisInt<3>]> >;
def X86vfpextRnd : SDNode<"X86ISD::VFPEXT",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisFP<0>, SDTCisFP<1>,
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index ee686609300..12f9ae0f696 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -1362,6 +1362,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
ISD::FP16_TO_FP, 0),
X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK_RM,
ISD::FP16_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_128, INTR_TYPE_2OP_MASK_RM,
+ ISD::FP_TO_FP16, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_256, INTR_TYPE_2OP_MASK_RM,
+ ISD::FP_TO_FP16, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_512, INTR_TYPE_2OP_MASK_RM,
+ ISD::FP_TO_FP16, 0),
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD,
OpenPOWER on IntegriCloud