diff options
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 33 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 103 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 8 |
5 files changed, 94 insertions, 59 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 5cda0dc7220..0d781b5fe4f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -16310,7 +16310,9 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT, /// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the /// necessary casting for \p Mask when lowering masking intrinsics. static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, - SDValue PreservedSrc, SelectionDAG &DAG) { + SDValue PreservedSrc, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { EVT VT = Op.getValueType(); EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VT.getVectorNumElements()); @@ -16337,7 +16339,8 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, case X86ISD::CMPMU: return DAG.getNode(ISD::AND, dl, VT, Op, VMask); } - + if (PreservedSrc.getOpcode() == ISD::UNDEF) + PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); return DAG.getNode(ISD::VSELECT, dl, VT, VMask, Op, PreservedSrc); } @@ -16389,10 +16392,11 @@ static unsigned getOpcodeForFMAIntrinsic(unsigned IntNo) { } } -static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { SDLoc dl(Op); unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); - + EVT VT = Op.getValueType(); const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo); if (IntrData) { switch(IntrData->Type) { @@ -16404,6 +16408,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { case INTR_TYPE_3OP: return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case INTR_TYPE_1OP_MASK_RM: { + SDValue Src = Op.getOperand(1); + SDValue Src0 = Op.getOperand(2); + SDValue Mask = Op.getOperand(3); + SDValue RoundingMode = Op.getOperand(4); + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src, + RoundingMode), + Mask, Src0, Subtarget, DAG); + } + case CMP_MASK: case CMP_MASK_CC: { // Comparison intrinsics with masks. @@ -16431,7 +16445,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { Op.getOperand(2)); } SDValue CmpMask = getVectorMaskingNode(Cmp, Mask, - DAG.getTargetConstant(0, MaskVT), DAG); + DAG.getTargetConstant(0, MaskVT), + Subtarget, DAG); SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT, DAG.getUNDEF(BitcastVT), CmpMask, DAG.getIntPtrConstant(0)); @@ -16598,7 +16613,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { Op.getValueType(), Op.getOperand(2), Op.getOperand(1), Op.getOperand(3)), - Op.getOperand(5), Op.getOperand(4), DAG); + Op.getOperand(5), Op.getOperand(4), + Subtarget, DAG); // ptest and testp intrinsics. The intrinsic these come from are designed to // return an integer value, not just an instruction so lower it to the ptest @@ -16772,7 +16788,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)), - Op.getOperand(4), Op.getOperand(1), DAG); + Op.getOperand(4), Op.getOperand(1), + Subtarget, DAG); else return SDValue(); } @@ -18887,7 +18904,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::VAARG: return LowerVAARG(Op, DAG); case ISD::VACOPY: return LowerVACOPY(Op, Subtarget, DAG); - case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, Subtarget, DAG); case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, Subtarget, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 495cf04d4b2..7c6ffa2afa2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -419,6 +419,9 @@ namespace llvm { // Test if in transactional execution. XTEST, + // ERI instructions + RSQRT28, RCP28, EXP2, + // Compare and swap. LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, LCMPXCHG8_DAG, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 3ff37d45376..596d2c3bb4b 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -146,20 +146,21 @@ multiclass AVX512_maskable_custom<bits<8> O, Format F, list<dag> Pattern, list<dag> MaskingPattern, list<dag> ZeroMaskingPattern, + string Round = "", string MaskingConstraint = "", InstrItinClass itin = NoItinerary, bit IsCommutable = 0> { let isCommutable = IsCommutable in def NAME: AVX512<O, F, Outs, Ins, - OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# - "$dst, "#IntelSrcAsm#"}", + OpcodeStr#"\t{"#AttSrcAsm#", $dst "#Round#"|"# + "$dst "#Round#", "#IntelSrcAsm#"}", Pattern, itin>; // Prefer over VMOV*rrk Pat<> let AddedComplexity = 20 in def NAME#k: AVX512<O, F, Outs, MaskingIns, - OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# - "$dst {${mask}}, "#IntelSrcAsm#"}", + OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}"#Round#"|"# + "$dst {${mask}}"#Round#", "#IntelSrcAsm#"}", MaskingPattern, itin>, EVEX_K { // In case of the 3src subclass this is overridden with a let. @@ -167,8 +168,8 @@ multiclass AVX512_maskable_custom<bits<8> O, Format F, } let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<> def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns, - OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"# - "$dst {${mask}} {z}, "#IntelSrcAsm#"}", + OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}"#Round#"|"# + "$dst {${mask}} {z}"#Round#", "#IntelSrcAsm#"}", ZeroMaskingPattern, itin>, EVEX_KZ; @@ -182,6 +183,7 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, dag RHS, dag MaskingRHS, + string Round = "", string MaskingConstraint = "", InstrItinClass itin = NoItinerary, bit IsCommutable = 0> : @@ -191,7 +193,7 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _, [(set _.RC:$dst, MaskingRHS)], [(set _.RC:$dst, (vselect _.KRCWM:$mask, RHS, _.ImmAllZerosV))], - MaskingConstraint, NoItinerary, IsCommutable>; + Round, MaskingConstraint, NoItinerary, IsCommutable>; // This multiclass generates the unconditional/non-masking, the masking and // the zero-masking variant of the instruction. In the masking case, the @@ -199,13 +201,14 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _, multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, - dag RHS, InstrItinClass itin = NoItinerary, + dag RHS, string Round = "", + InstrItinClass itin = NoItinerary, bit IsCommutable = 0> : AVX512_maskable_common<O, F, _, Outs, Ins, !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), !con((ins _.KRCWM:$mask), Ins), OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, - (vselect _.KRCWM:$mask, RHS, _.RC:$src0), + (vselect _.KRCWM:$mask, RHS, _.RC:$src0), Round, "$src0 = $dst", itin, IsCommutable>; // Similar to AVX512_maskable but in this case one of the source operands @@ -232,7 +235,7 @@ multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _, AVX512_maskable_custom<O, F, Outs, Ins, !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), !con((ins _.KRCWM:$mask), Ins), - OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], + OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], "", "$src0 = $dst">; // Bitcasts between 512-bit vector types. Return the original type since @@ -2626,7 +2629,7 @@ multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, _.RC:$src2)), - itins.rr, IsCommutable>, + "", itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V; let mayLoad = 1 in @@ -2635,7 +2638,7 @@ multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))), - itins.rm>, + "", itins.rm>, AVX512BIBase, EVEX_4V; } @@ -2651,7 +2654,7 @@ multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, (_.VT (OpNode _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src2)))), - itins.rm>, + "", itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B; } @@ -4199,44 +4202,44 @@ def : Pat <(v2f64 (int_x86_avx512_rsqrt28_sd (v2f64 VR128X:$src1), (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>; /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd -multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, - RegisterClass RC, X86MemOperand x86memop> { - let hasSideEffects = 0, Predicates = [HasERI] in { - def r : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), - !strconcat(OpcodeStr, - " \t{$src, $dst|$dst, $src}"), - []>, EVEX; - def rb : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), - !strconcat(OpcodeStr, - " \t{{sae}, $src, $dst|$dst, $src, {sae}}"), - []>, EVEX, EVEX_B; - def m : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), - !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), - []>, EVEX; - } + +multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, + SDNode OpNode> { + + defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src), OpcodeStr, "$src", "$src", + (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>; + + defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src), OpcodeStr, + "$src", "$src", + (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC)), "{sae}">, EVEX_B; + + defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.MemOp:$src), OpcodeStr, "$src", "$src", + (OpNode (_.FloatVT + (bitconvert (_.LdFrag addr:$src))), (i32 FROUND_CURRENT))>; + + defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.MemOp:$src), OpcodeStr, "$src", "$src", + (OpNode (_.FloatVT + (X86VBroadcast (_.ScalarLdFrag addr:$src))), + (i32 FROUND_CURRENT))>, EVEX_B; +} + +multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode> { + defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode>, + EVEX_CD8<32, CD8VF>; + defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode>, + VEX_W, EVEX_CD8<32, CD8VF>; +} + +let Predicates = [HasERI], hasSideEffects = 0 in { + + defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX, EVEX_V512, T8PD; + defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX, EVEX_V512, T8PD; + defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX, EVEX_V512, T8PD; } -defm VRSQRT28PSZ : avx512_fp28_p<0xCC, "vrsqrt28ps", VR512, f512mem>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VRSQRT28PDZ : avx512_fp28_p<0xCC, "vrsqrt28pd", VR512, f512mem>, - VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; -defm VRCP28PSZ : avx512_fp28_p<0xCA, "vrcp28ps", VR512, f512mem>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VRCP28PDZ : avx512_fp28_p<0xCA, "vrcp28pd", VR512, f512mem>, - VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; - -def : Pat <(v16f32 (int_x86_avx512_rsqrt28_ps (v16f32 VR512:$src), - (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)), - (VRSQRT28PSZrb VR512:$src)>; -def : Pat <(v8f64 (int_x86_avx512_rsqrt28_pd (v8f64 VR512:$src), - (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)), - (VRSQRT28PDZrb VR512:$src)>; - -def : Pat <(v16f32 (int_x86_avx512_rcp28_ps (v16f32 VR512:$src), - (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_NO_EXC)), - (VRCP28PSZrb VR512:$src)>; -def : Pat <(v8f64 (int_x86_avx512_rcp28_pd (v8f64 VR512:$src), - (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_NO_EXC)), - (VRCP28PDZrb VR512:$src)>; multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _>{ diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 2badbb7d76b..1c7215ccbf3 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -203,6 +203,8 @@ def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>, SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>; +def STDFp1SrcRm : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, + SDTCisVec<0>, SDTCisInt<2>]>; def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>; def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>; @@ -261,6 +263,10 @@ def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFma>; def X86Fmaddsub : SDNode<"X86ISD::FMADDSUB", SDTFma>; def X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFma>; +def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", STDFp1SrcRm>; +def X86rcp28 : SDNode<"X86ISD::RCP28", STDFp1SrcRm>; +def X86exp2 : SDNode<"X86ISD::EXP2", STDFp1SrcRm>; + def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>, SDTCisVT<4, i8>]>; diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 4abbdd64107..c1f6c200d47 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -20,7 +20,7 @@ enum IntrinsicType { INTR_NO_TYPE, GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, - CMP_MASK, CMP_MASK_CC, VSHIFT, COMI + CMP_MASK, CMP_MASK_CC, VSHIFT, COMI, INTR_TYPE_1OP_MASK_RM }; struct IntrinsicData { @@ -156,6 +156,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0), X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0), X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0), + X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::EXP2, 0), + X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::EXP2, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_b_128, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_b_256, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_b_512, CMP_MASK_CC, X86ISD::CMPM, 0), @@ -204,6 +206,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_ucmp_w_128, CMP_MASK_CC, X86ISD::CMPMU, 0), X86_INTRINSIC_DATA(avx512_mask_ucmp_w_256, CMP_MASK_CC, X86ISD::CMPMU, 0), X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0), + X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0), + X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0), + X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0), + X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0), X86_INTRINSIC_DATA(avx_hadd_pd_256, INTR_TYPE_2OP, X86ISD::FHADD, 0), X86_INTRINSIC_DATA(avx_hadd_ps_256, INTR_TYPE_2OP, X86ISD::FHADD, 0), X86_INTRINSIC_DATA(avx_hsub_pd_256, INTR_TYPE_2OP, X86ISD::FHSUB, 0), |

