diff options
author | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2015-02-18 07:59:20 +0000 |
---|---|---|
committer | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2015-02-18 07:59:20 +0000 |
commit | 714f23bcdb80fe4609a6a7fc290e1527878e4732 (patch) | |
tree | f659593aa8c02fe3e2007a48d05b0ad7e1d7d08a /llvm/lib | |
parent | 7ff7eb706a0b05548efa696559b0a0a67ef5bf35 (diff) | |
download | bcm5719-llvm-714f23bcdb80fe4609a6a7fc290e1527878e4732.tar.gz bcm5719-llvm-714f23bcdb80fe4609a6a7fc290e1527878e4732.zip |
AVX-512: Added support for FP instructions with embedded rounding mode.
By Asaf Badouh <asaf.badouh@intel.com>
llvm-svn: 229645
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 23 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 7 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 30 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 17 |
5 files changed, 73 insertions, 12 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6255cce0d2b..063d7b8c4f7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -17595,22 +17595,26 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget Mask, Src0, Subtarget, DAG); } case INTR_TYPE_2OP_MASK: { - SDValue Mask = Op.getOperand(4); + SDValue Src1 = Op.getOperand(1); + SDValue Src2 = Op.getOperand(2); SDValue PassThru = Op.getOperand(3); + SDValue Mask = Op.getOperand(4); + // We specify 2 possible opcodes for intrinsics with rounding modes. + // First, we check if the intrinsic may have non-default rounding mode, + // (IntrData->Opc1 != 0), then we check the rounding mode operand. unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; if (IntrWithRoundingModeOpcode != 0) { - unsigned Round = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue(); + SDValue Rnd = Op.getOperand(5); + unsigned Round = cast<ConstantSDNode>(Rnd)->getZExtValue(); if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) { return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2), - Op.getOperand(3), Op.getOperand(5)), + Src1, Src2, Rnd), Mask, PassThru, Subtarget, DAG); } } return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, - Op.getOperand(1), - Op.getOperand(2)), + Src1,Src2), Mask, PassThru, Subtarget, DAG); } case FMA_OP_MASK: { @@ -17618,6 +17622,9 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget SDValue Src2 = Op.getOperand(2); SDValue Src3 = Op.getOperand(3); SDValue Mask = Op.getOperand(4); + // We specify 2 possible opcodes for intrinsics with rounding modes. + // First, we check if the intrinsic may have non-default rounding mode, + // (IntrData->Opc1 != 0), then we check the rounding mode operand. unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; if (IntrWithRoundingModeOpcode != 0) { SDValue Rnd = Op.getOperand(5); @@ -20585,6 +20592,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::ADDSUB: return "X86ISD::ADDSUB"; case X86ISD::RCP28: return "X86ISD::RCP28"; case X86ISD::RSQRT28: return "X86ISD::RSQRT28"; + case X86ISD::FADD_RND: return "X86ISD::FADD_RND"; + case X86ISD::FSUB_RND: return "X86ISD::FSUB_RND"; + case X86ISD::FMUL_RND: return "X86ISD::FMUL_RND"; + case X86ISD::FDIV_RND: return "X86ISD::FDIV_RND"; } } diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index eb904f31788..5d69c1fa188 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -201,7 +201,12 @@ namespace llvm { /// ADDSUB - Combined add and sub on an FP vector. ADDSUB, - + // FADD, FSUB, FMUL, FDIV, FMIN, FMAX - FP vector ops with rounding mode. + FADD_RND, + FSUB_RND, + FMUL_RND, + FDIV_RND, + // SUBUS - Integer sub with unsigned saturation. SUBUS, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 43c0f45ed11..dd5cc911b1e 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -3269,7 +3269,16 @@ multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, }//let mayLoad = 1 } -multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, + X86VectorVTInfo _, bit IsCommutable> { + defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix, + "$rc, $src2, $src1", "$src1, $src2, $rc", + (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc)))>, + EVEX_4V, EVEX_B, EVEX_RC; +} + +multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, bit IsCommutable = 0> { defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>, EVEX_V512, PS, @@ -3295,12 +3304,23 @@ multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, } } -defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, 1>; -defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, 1>; +multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> { + defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info, 0>, + EVEX_V512, PS, EVEX_CD8<32, CD8VF>; + defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info, 0>, + EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; +} + +defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, 1>, + avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd>; +defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, 1>, + avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd>; +defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>, + avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd>; +defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv>, + avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd>; defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, 1>; defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, 1>; -defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>; -defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv>; def : Pat<(v16f32 (int_x86_avx512_mask_max_ps_512 (v16f32 VR512:$src1), (v16f32 VR512:$src2), (bc_v16f32 (v16i32 immAllZerosV)), diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 1f4b49f7099..f8590e59dbf 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -212,6 +212,9 @@ def SDTVBroadcastm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>; def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>; +def SDTFPBinOpRound : SDTypeProfile<1, 3, [ // fadd_round, fmul_round, etc. + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>, SDTCisInt<3>]>; + def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>, SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>; def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>, @@ -271,6 +274,11 @@ def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>; def X86Addsub : SDNode<"X86ISD::ADDSUB", SDTFPBinOp>; +def X86faddRnd : SDNode<"X86ISD::FADD_RND", SDTFPBinOpRound>; +def X86fsubRnd : SDNode<"X86ISD::FSUB_RND", SDTFPBinOpRound>; +def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>; +def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>; + def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>; def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>; def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFma>; diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 1cbb8968c30..d32b448c619 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -244,6 +244,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0), X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::EXP2, 0), X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::EXP2, 0), + X86_INTRINSIC_DATA(avx512_mask_add_pd_512, INTR_TYPE_2OP_MASK, ISD::FADD, + X86ISD::FADD_RND), + X86_INTRINSIC_DATA(avx512_mask_add_ps_512, INTR_TYPE_2OP_MASK, ISD::FADD, + X86ISD::FADD_RND), X86_INTRINSIC_DATA(avx512_mask_blend_b_128, BLEND, X86ISD::SELECT, 0), X86_INTRINSIC_DATA(avx512_mask_blend_b_256, BLEND, X86ISD::SELECT, 0), X86_INTRINSIC_DATA(avx512_mask_blend_b_512, BLEND, X86ISD::SELECT, 0), @@ -299,6 +303,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_compress_q_512, COMPRESS_EXPAND_IN_REG, X86ISD::COMPRESS, 0), + X86_INTRINSIC_DATA(avx512_mask_div_pd_512, INTR_TYPE_2OP_MASK, ISD::FDIV, + X86ISD::FDIV_RND), + X86_INTRINSIC_DATA(avx512_mask_div_ps_512, INTR_TYPE_2OP_MASK, ISD::FDIV, + X86ISD::FDIV_RND), X86_INTRINSIC_DATA(avx512_mask_expand_d_128, COMPRESS_EXPAND_IN_REG, X86ISD::EXPAND, 0), X86_INTRINSIC_DATA(avx512_mask_expand_d_256, COMPRESS_EXPAND_IN_REG, @@ -323,6 +331,11 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::EXPAND, 0), X86_INTRINSIC_DATA(avx512_mask_expand_q_512, COMPRESS_EXPAND_IN_REG, X86ISD::EXPAND, 0), + + X86_INTRINSIC_DATA(avx512_mask_mul_pd_512, INTR_TYPE_2OP_MASK, ISD::FMUL, + X86ISD::FMUL_RND), + X86_INTRINSIC_DATA(avx512_mask_mul_ps_512, INTR_TYPE_2OP_MASK, ISD::FMUL, + X86ISD::FMUL_RND), X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_128, CMP_MASK, X86ISD::PCMPEQM, 0), X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_256, CMP_MASK, X86ISD::PCMPEQM, 0), X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_512, CMP_MASK, X86ISD::PCMPEQM, 0), @@ -365,6 +378,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_psrli_q, VSHIFT_MASK, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_mask_psrlv_d, INTR_TYPE_2OP_MASK, ISD::SRL, 0), X86_INTRINSIC_DATA(avx512_mask_psrlv_q, INTR_TYPE_2OP_MASK, ISD::SRL, 0), + X86_INTRINSIC_DATA(avx512_mask_sub_pd_512, INTR_TYPE_2OP_MASK, ISD::FSUB, + X86ISD::FSUB_RND), + X86_INTRINSIC_DATA(avx512_mask_sub_ps_512, INTR_TYPE_2OP_MASK, ISD::FSUB, + X86ISD::FSUB_RND), X86_INTRINSIC_DATA(avx512_mask_ucmp_b_128, CMP_MASK_CC, X86ISD::CMPMU, 0), X86_INTRINSIC_DATA(avx512_mask_ucmp_b_256, CMP_MASK_CC, X86ISD::CMPMU, 0), X86_INTRINSIC_DATA(avx512_mask_ucmp_b_512, CMP_MASK_CC, X86ISD::CMPMU, 0), |