diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 203 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 135 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86Schedule.td | 4 |
3 files changed, 180 insertions, 162 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 044682c851a..a8ae3626638 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2642,39 +2642,42 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode, multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, - X86FoldableSchedWrite sched, Predicate prd, + X86SchedWriteWidths sched, Predicate prd, string broadcast>{ let Predicates = [prd] in { - defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched, + defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, "{z}", broadcast>, EVEX_V512; } let Predicates = [prd, HasVLX] in { - defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched, + defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.XMM, _.info128, "{x}", broadcast>, EVEX_V128; - defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched, + defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.YMM, _.info256, "{y}", broadcast>, EVEX_V256; } } -// FIXME: Is there a better scheduler class for VFPCLASS? multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec, - bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{ + bits<8> opcScalar, SDNode VecOpNode, + SDNode ScalarOpNode, X86SchedWriteWidths sched, + Predicate prd> { defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec, - VecOpNode, WriteFAdd, prd, "{l}">, + VecOpNode, sched, prd, "{l}">, EVEX_CD8<32, CD8VF>; defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec, - VecOpNode, WriteFAdd, prd, "{q}">, + VecOpNode, sched, prd, "{q}">, EVEX_CD8<64, CD8VF> , VEX_W; defm SS : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode, - WriteFAdd, f32x_info, prd>, + sched.Scl, f32x_info, prd>, EVEX_CD8<32, CD8VT1>; defm SD : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode, - WriteFAdd, f64x_info, prd>, + sched.Scl, f64x_info, prd>, EVEX_CD8<64, CD8VT1>, VEX_W; } +// FIXME: Is there a better scheduler class for VFPCLASS? defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass, - X86Vfpclasss, HasDQI>, AVX512AIi8Base,EVEX; + X86Vfpclasss, SchedWriteFAdd, HasDQI>, + AVX512AIi8Base, EVEX; //----------------------------------------------------------------- // Mask register copy, including @@ -4811,6 +4814,7 @@ defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, //===----------------------------------------------------------------------===// // AVX-512 FP arithmetic //===----------------------------------------------------------------------===// + multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, SDNode OpNode, SDNode VecNode, X86FoldableSchedWrite sched, bit IsCommutable> { @@ -4925,14 +4929,18 @@ multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, VecNode, SaeNode, sched, IsCommutable>, XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; } -defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds, WriteFAdd, 1>; -defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, WriteFMul, 1>; -defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, WriteFAdd, 0>; -defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, WriteFDiv, 0>; -defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fmins, X86fminRnds, - WriteFCmp, 0>; -defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds, - WriteFCmp, 0>; +defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds, + SchedWriteFAdd.Scl, 1>; +defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, + SchedWriteFMul.Scl, 1>; +defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, + SchedWriteFAdd.Scl, 0>; +defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, + SchedWriteFDiv.Scl, 0>; +defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminRnds, + SchedWriteFCmp.Scl, 0>; +defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds, + SchedWriteFCmp.Scl, 0>; // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use // X86fminc and X86fmaxc instead of X86fmin and X86fmax @@ -5247,33 +5255,36 @@ multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, } } -multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode, SDNode OpNodeScal> { - defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, WriteFAdd, v16f32_info>, - avx512_fp_round_packed<opc, OpcodeStr, OpNode, WriteFAdd, v16f32_info>, +multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, + SDNode OpNode, SDNode OpNodeScal, + X86SchedWriteWidths sched> { + defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>, + avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, WriteFAdd, v8f64_info>, - avx512_fp_round_packed<opc, OpcodeStr, OpNode, WriteFAdd, v8f64_info>, + defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>, + avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, WriteFAdd, f32x_info>, - avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, WriteFAdd>, + defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f32x_info>, + avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, sched.Scl>, EVEX_4V,EVEX_CD8<32, CD8VT1>; - defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, WriteFAdd, f64x_info>, - avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, WriteFAdd>, + defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f64x_info>, + avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, sched.Scl>, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { - defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, WriteFAdd, v4f32x_info>, + defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v4f32x_info>, EVEX_V128, EVEX_CD8<32, CD8VF>; - defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, WriteFAdd, v8f32x_info>, + defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v8f32x_info>, EVEX_V256, EVEX_CD8<32, CD8VF>; - defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, WriteFAdd, v2f64x_info>, + defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v2f64x_info>, EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; - defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, WriteFAdd, v4f64x_info>, + defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v4f64x_info>, EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; } } -defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs>, T8PD; +defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs, + SchedWriteFAdd>, T8PD; //===----------------------------------------------------------------------===// // AVX-512 VPTESTM instructions @@ -7947,14 +7958,14 @@ multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode, } let Predicates = [HasERI] in { - defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, WriteFRcp>, - T8PD, EVEX_4V; - defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, WriteFRsqrt>, + defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SchedWriteFRcp.Scl>, T8PD, EVEX_4V; + defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, + SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V; } -defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds, WriteFAdd>, - T8PD, EVEX_4V; +defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds, + SchedWriteFAdd.Scl>, T8PD, EVEX_4V; /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, @@ -7992,38 +8003,38 @@ multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, } multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode, - X86FoldableSchedWrite sched> { - defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched>, - avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched>, + X86SchedWriteWidths sched> { + defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>, + avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; - defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched>, - avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched>, + defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>, + avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>, T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; } multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr, - SDNode OpNode, X86FoldableSchedWrite sched> { + SDNode OpNode, X86SchedWriteWidths sched> { // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { - defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, sched>, + defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, sched.XMM>, EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; - defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, sched>, + defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, sched.YMM>, EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; - defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, sched>, + defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, sched.XMM>, EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; - defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, sched>, + defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, sched.YMM>, EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; } } -let Predicates = [HasERI] in { - defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, WriteFRsqrt>, EVEX; - defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, WriteFRcp>, EVEX; - defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, WriteFAdd>, EVEX; +let Predicates = [HasERI] in { + defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SchedWriteFRsqrt>, EVEX; + defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SchedWriteFRcp>, EVEX; + defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SchedWriteFAdd>, EVEX; } -defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, WriteFAdd>, +defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SchedWriteFAdd>, avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd, - WriteFAdd>, EVEX; + SchedWriteFAdd>, EVEX; multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _>{ @@ -8234,12 +8245,15 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, } } -defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", WriteFAdd, - f32x_info>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VT1>; +defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", + SchedWriteFAdd.Scl, f32x_info>, + AVX512AIi8Base, EVEX_4V, + EVEX_CD8<32, CD8VT1>; -defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", WriteFAdd, - f64x_info>, VEX_W, AVX512AIi8Base, EVEX_4V, - EVEX_CD8<64, CD8VT1>; +defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", + SchedWriteFAdd.Scl, f64x_info>, + VEX_W, AVX512AIi8Base, EVEX_4V, + EVEX_CD8<64, CD8VT1>; //------------------------------------------------- // Integer truncate and extend operations @@ -9128,17 +9142,17 @@ multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr, AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, - SDNode OpNodeRnd, X86FoldableSchedWrite sched, Predicate prd>{ + SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{ let Predicates = [prd] in { - defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched, + defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, - sched, _.info512>, EVEX_V512; + sched.ZMM, _.info512>, EVEX_V512; } let Predicates = [prd, HasVLX] in { - defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched, + defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128; - defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched, + defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256; } } @@ -9273,17 +9287,17 @@ multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr, AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, - SDNode OpNodeRnd, X86FoldableSchedWrite sched, Predicate prd>{ + SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{ let Predicates = [prd] in { - defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched, _.info512>, - avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, sched, _.info512>, + defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, + avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, sched.ZMM, _.info512>, EVEX_V512; } let Predicates = [prd, HasVLX] in { - defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched, _.info128>, + defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128; - defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched, _.info256>, + defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256; } } @@ -9320,16 +9334,16 @@ multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _, multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr, X86VectorVTInfo _, bits<8> opc, SDNode OpNode, - SDNode OpNodeRnd, X86FoldableSchedWrite sched, Predicate prd>{ + SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd> { let Predicates = [prd] in { - defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched, _>, - avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, sched, _>; + defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>, + avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, sched.XMM, _>; } } multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr, bits<8> opcPs, bits<8> opcPd, SDNode OpNode, - SDNode OpNodeRnd, X86FoldableSchedWrite sched, Predicate prd>{ + SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{ defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info, opcPs, OpNode, OpNodeRnd, sched, prd>, EVEX_CD8<32, CD8VF>; @@ -9339,43 +9353,43 @@ multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr, } defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, - X86VReduce, X86VReduceRnd, WriteFAdd, HasDQI>, + X86VReduce, X86VReduceRnd, SchedWriteFAdd, HasDQI>, AVX512AIi8Base, EVEX; defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, - X86VRndScale, X86VRndScaleRnd, WriteFAdd, HasAVX512>, + X86VRndScale, X86VRndScaleRnd, SchedWriteFAdd, HasAVX512>, AVX512AIi8Base, EVEX; defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26, - X86VGetMant, X86VGetMantRnd, WriteFAdd, HasAVX512>, + X86VGetMant, X86VGetMantRnd, SchedWriteFAdd, HasAVX512>, AVX512AIi8Base, EVEX; defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, 0x50, X86VRange, X86VRangeRnd, - WriteFAdd, HasDQI>, + SchedWriteFAdd, HasDQI>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, 0x50, X86VRange, X86VRangeRnd, - WriteFAdd, HasDQI>, + SchedWriteFAdd, HasDQI>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", - f64x_info, 0x51, X86Ranges, X86RangesRnd, WriteFAdd, HasDQI>, + f64x_info, 0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, - 0x51, X86Ranges, X86RangesRnd, WriteFAdd, HasDQI>, + 0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, - 0x57, X86Reduces, X86ReducesRnd, WriteFAdd, HasDQI>, + 0x57, X86Reduces, X86ReducesRnd, SchedWriteFAdd, HasDQI>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, - 0x57, X86Reduces, X86ReducesRnd, WriteFAdd, HasDQI>, + 0x57, X86Reduces, X86ReducesRnd, SchedWriteFAdd, HasDQI>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info, - 0x27, X86GetMants, X86GetMantsRnd, WriteFAdd, HasAVX512>, + 0x27, X86GetMants, X86GetMantsRnd, SchedWriteFAdd, HasAVX512>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info, - 0x27, X86GetMants, X86GetMantsRnd, WriteFAdd, HasAVX512>, + 0x27, X86GetMants, X86GetMantsRnd, SchedWriteFAdd, HasAVX512>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; let Predicates = [HasAVX512] in { @@ -10520,33 +10534,32 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, } } -multiclass avx512_fixupimm_packed_all<X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _Vec> { +multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched, + AVX512VLVectorVTInfo _Vec> { let Predicates = [HasAVX512] in - defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched, + defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.ZMM, _Vec.info512>, - avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, sched, + avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, sched.ZMM, _Vec.info512>, AVX512AIi8Base, EVEX_4V, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { - defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched, + defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.XMM, _Vec.info128>, AVX512AIi8Base, EVEX_4V, EVEX_V128; - defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched, + defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.YMM, _Vec.info256>, AVX512AIi8Base, EVEX_4V, EVEX_V256; } } defm VFIXUPIMMSS : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar, - WriteFAdd, f32x_info, v4i32x_info>, + SchedWriteFAdd.Scl, f32x_info, v4i32x_info>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; defm VFIXUPIMMSD : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar, - WriteFAdd, f64x_info, v2i64x_info>, + SchedWriteFAdd.Scl, f64x_info, v2i64x_info>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; -defm VFIXUPIMMPS : avx512_fixupimm_packed_all<WriteFAdd, avx512vl_f32_info>, +defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info>, EVEX_CD8<32, CD8VF>; -defm VFIXUPIMMPD : avx512_fixupimm_packed_all<WriteFAdd, avx512vl_f64_info>, +defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info>, EVEX_CD8<64, CD8VF>, VEX_W; - - // Patterns used to select SSE scalar fp arithmetic instructions from // either: // diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index f9774acb88a..91e69f123ee 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -2508,99 +2508,99 @@ def : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)), /// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those /// classes below multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, - SDNode OpNode, X86FoldableSchedWrite sched> { + SDNode OpNode, X86SchedWriteWidths sched> { let Predicates = [HasAVX, NoVLX] in { defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128, v4f32, f128mem, loadv4f32, - SSEPackedSingle, sched, 0>, PS, VEX_4V, VEX_WIG; + SSEPackedSingle, sched.XMM, 0>, PS, VEX_4V, VEX_WIG; defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128, v2f64, f128mem, loadv2f64, - SSEPackedDouble, sched, 0>, PD, VEX_4V, VEX_WIG; + SSEPackedDouble, sched.XMM, 0>, PD, VEX_4V, VEX_WIG; defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256, v8f32, f256mem, loadv8f32, - SSEPackedSingle, sched, 0>, PS, VEX_4V, VEX_L, VEX_WIG; + SSEPackedSingle, sched.YMM, 0>, PS, VEX_4V, VEX_L, VEX_WIG; defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256, v4f64, f256mem, loadv4f64, - SSEPackedDouble, sched, 0>, PD, VEX_4V, VEX_L, VEX_WIG; + SSEPackedDouble, sched.YMM, 0>, PD, VEX_4V, VEX_L, VEX_WIG; } let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128, v4f32, f128mem, memopv4f32, SSEPackedSingle, - sched>, PS; + sched.XMM>, PS; defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128, v2f64, f128mem, memopv2f64, SSEPackedDouble, - sched>, PD; + sched.XMM>, PD; } } multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, - X86FoldableSchedWrite sched> { + X86SchedWriteWidths sched> { defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), - OpNode, FR32, f32mem, SSEPackedSingle, sched, 0>, + OpNode, FR32, f32mem, SSEPackedSingle, sched.Scl, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG; defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), - OpNode, FR64, f64mem, SSEPackedDouble, sched, 0>, + OpNode, FR64, f64mem, SSEPackedDouble, sched.Scl, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG; let Constraints = "$src1 = $dst" in { defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), OpNode, FR32, f32mem, SSEPackedSingle, - sched>, XS; + sched.Scl>, XS; defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), OpNode, FR64, f64mem, SSEPackedDouble, - sched>, XD; + sched.Scl>, XD; } } multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, - X86FoldableSchedWrite sched> { + X86SchedWriteWidths sched> { defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32, !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32, - SSEPackedSingle, sched, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG; + SSEPackedSingle, sched.Scl, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG; defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64, !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64, - SSEPackedDouble, sched, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG; + SSEPackedDouble, sched.Scl, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG; let Constraints = "$src1 = $dst" in { defm SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32, !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32, - SSEPackedSingle, sched>, XS; + SSEPackedSingle, sched.Scl>, XS; defm SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64, !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64, - SSEPackedDouble, sched>, XD; + SSEPackedDouble, sched.Scl>, XD; } } // Binary Arithmetic instructions -defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, WriteFAdd>, - basic_sse12_fp_binop_s<0x58, "add", fadd, WriteFAdd>, - basic_sse12_fp_binop_s_int<0x58, "add", null_frag, WriteFAdd>; -defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, WriteFMul>, - basic_sse12_fp_binop_s<0x59, "mul", fmul, WriteFMul>, - basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, WriteFMul>; +defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SchedWriteFAdd>, + basic_sse12_fp_binop_s<0x58, "add", fadd, SchedWriteFAdd>, + basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SchedWriteFAdd>; +defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SchedWriteFMul>, + basic_sse12_fp_binop_s<0x59, "mul", fmul, SchedWriteFMul>, + basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SchedWriteFMul>; let isCommutable = 0 in { - defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, WriteFAdd>, - basic_sse12_fp_binop_s<0x5C, "sub", fsub, WriteFAdd>, - basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, WriteFAdd>; - defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, WriteFDiv>, - basic_sse12_fp_binop_s<0x5E, "div", fdiv, WriteFDiv>, - basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, WriteFDiv>; - defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, WriteFCmp>, - basic_sse12_fp_binop_s<0x5F, "max", X86fmax, WriteFCmp>, - basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, WriteFCmp>; - defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, WriteFCmp>, - basic_sse12_fp_binop_s<0x5D, "min", X86fmin, WriteFCmp>, - basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, WriteFCmp>; + defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SchedWriteFAdd>, + basic_sse12_fp_binop_s<0x5C, "sub", fsub, SchedWriteFAdd>, + basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, SchedWriteFAdd>; + defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SchedWriteFDiv>, + basic_sse12_fp_binop_s<0x5E, "div", fdiv, SchedWriteFDiv>, + basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, SchedWriteFDiv>; + defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SchedWriteFCmp>, + basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SchedWriteFCmp>, + basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, SchedWriteFCmp>; + defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SchedWriteFCmp>, + basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SchedWriteFCmp>, + basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, SchedWriteFCmp>; } let isCodeGenOnly = 1 in { - defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, WriteFCmp>, - basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, WriteFCmp>; - defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, WriteFCmp>, - basic_sse12_fp_binop_s<0x5D, "min", X86fminc, WriteFCmp>; + defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SchedWriteFCmp>, + basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SchedWriteFCmp>; + defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SchedWriteFCmp>, + basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SchedWriteFCmp>; } // Patterns used to select SSE scalar fp arithmetic instructions from @@ -4417,28 +4417,28 @@ multiclass sse3_addsub<string OpcodeStr, ValueType vt, RegisterClass RC, let Predicates = [HasAVX] in { let ExeDomain = SSEPackedSingle in { defm VADDSUBPS : sse3_addsub<"vaddsubps", v4f32, VR128, f128mem, - WriteFAdd, loadv4f32, 0>, XD, VEX_4V, - VEX_WIG; + SchedWriteFAdd.XMM, loadv4f32, 0>, + XD, VEX_4V, VEX_WIG; defm VADDSUBPSY : sse3_addsub<"vaddsubps", v8f32, VR256, f256mem, - WriteFAdd, loadv8f32, 0>, XD, VEX_4V, - VEX_L, VEX_WIG; + SchedWriteFAdd.YMM, loadv8f32, 0>, + XD, VEX_4V, VEX_L, VEX_WIG; } let ExeDomain = SSEPackedDouble in { defm VADDSUBPD : sse3_addsub<"vaddsubpd", v2f64, VR128, f128mem, - WriteFAdd, loadv2f64, 0>, PD, VEX_4V, - VEX_WIG; + SchedWriteFAdd.XMM, loadv2f64, 0>, + PD, VEX_4V, VEX_WIG; defm VADDSUBPDY : sse3_addsub<"vaddsubpd", v4f64, VR256, f256mem, - WriteFAdd, loadv4f64, 0>, PD, VEX_4V, - VEX_L, VEX_WIG; + SchedWriteFAdd.YMM, loadv4f64, 0>, + PD, VEX_4V, VEX_L, VEX_WIG; } } let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { let ExeDomain = SSEPackedSingle in - defm ADDSUBPS : sse3_addsub<"addsubps", v4f32, VR128, f128mem, WriteFAdd, - memopv4f32>, XD; + defm ADDSUBPS : sse3_addsub<"addsubps", v4f32, VR128, f128mem, + SchedWriteFAdd.XMM, memopv4f32>, XD; let ExeDomain = SSEPackedDouble in - defm ADDSUBPD : sse3_addsub<"addsubpd", v2f64, VR128, f128mem, WriteFAdd, - memopv2f64>, PD; + defm ADDSUBPD : sse3_addsub<"addsubpd", v2f64, VR128, f128mem, + SchedWriteFAdd.XMM, memopv2f64>, PD; } //===---------------------------------------------------------------------===// @@ -5500,26 +5500,27 @@ let Predicates = [HasAVX, NoVLX] in { let ExeDomain = SSEPackedSingle in { // Intrinsic form defm VROUNDPS : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32, - loadv4f32, X86VRndScale, WriteFAdd>, + loadv4f32, X86VRndScale, SchedWriteFAdd.XMM>, VEX, VEX_WIG; defm VROUNDPSY : sse41_fp_unop_p<0x08, "vroundps", f256mem, VR256, v8f32, - loadv8f32, X86VRndScale, WriteFAdd>, + loadv8f32, X86VRndScale, SchedWriteFAdd.YMM>, VEX, VEX_L, VEX_WIG; } let ExeDomain = SSEPackedDouble in { defm VROUNDPD : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64, - loadv2f64, X86VRndScale, WriteFAdd>, + loadv2f64, X86VRndScale, SchedWriteFAdd.XMM>, VEX, VEX_WIG; defm VROUNDPDY : sse41_fp_unop_p<0x09, "vroundpd", f256mem, VR256, v4f64, - loadv4f64, X86VRndScale, WriteFAdd>, + loadv4f64, X86VRndScale, SchedWriteFAdd.YMM>, VEX, VEX_L, VEX_WIG; } } let Predicates = [HasAVX, NoAVX512] in { - defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", WriteFAdd, v4f32, v2f64, - X86RndScales, 0>, VEX_4V, VEX_LIG, VEX_WIG; - defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", WriteFAdd>, + defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFAdd.Scl, + v4f32, v2f64, X86RndScales, 0>, + VEX_4V, VEX_LIG, VEX_WIG; + defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFAdd.Scl>, VEX_4V, VEX_LIG, VEX_WIG; } @@ -5594,15 +5595,15 @@ let Predicates = [HasAVX, NoVLX] in { let ExeDomain = SSEPackedSingle in defm ROUNDPS : sse41_fp_unop_p<0x08, "roundps", f128mem, VR128, v4f32, - memopv4f32, X86VRndScale, WriteFAdd>; + memopv4f32, X86VRndScale, SchedWriteFAdd.XMM>; let ExeDomain = SSEPackedDouble in defm ROUNDPD : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64, - memopv2f64, X86VRndScale, WriteFAdd>; + memopv2f64, X86VRndScale, SchedWriteFAdd.XMM>; -defm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", WriteFAdd>; +defm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFAdd.Scl>; let Constraints = "$src1 = $dst" in -defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", WriteFAdd, +defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFAdd.Scl, v4f32, v2f64, X86RndScales>; let Predicates = [UseSSE41] in { @@ -5996,15 +5997,15 @@ let Predicates = [HasAVX] in { let ExeDomain = SSEPackedSingle in defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, VR128, loadv4f32, f128mem, 0, - WriteFAdd>, VEX_4V, VEX_WIG; + SchedWriteFAdd.XMM>, VEX_4V, VEX_WIG; let ExeDomain = SSEPackedDouble in defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, VR128, loadv2f64, f128mem, 0, - WriteFAdd>, VEX_4V, VEX_WIG; + SchedWriteFAdd.XMM>, VEX_4V, VEX_WIG; let ExeDomain = SSEPackedSingle in defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256, VR256, loadv8f32, i256mem, 0, - WriteFAdd>, VEX_4V, VEX_L, VEX_WIG; + SchedWriteFAdd.YMM>, VEX_4V, VEX_L, VEX_WIG; } let Predicates = [HasAVX2] in { @@ -6024,11 +6025,11 @@ let Constraints = "$src1 = $dst" in { let ExeDomain = SSEPackedSingle in defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, VR128, memopv4f32, f128mem, 1, - WriteFAdd>; + SchedWriteFAdd.XMM>; let ExeDomain = SSEPackedDouble in defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, VR128, memopv2f64, f128mem, 1, - WriteFAdd>; + SchedWriteFAdd.XMM>; } /// SS41I_blend_rmi - SSE 4.1 blend with 8-bit immediate diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index cf0e3db0b0d..a9ad9f7ae6f 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -208,6 +208,10 @@ def SchedWriteFMul : X86SchedWriteWidths<WriteFMul, WriteFMul, WriteFMul, WriteFMul>; def SchedWriteFDiv : X86SchedWriteWidths<WriteFDiv, WriteFDiv, WriteFDiv, WriteFDiv>; +def SchedWriteFRcp + : X86SchedWriteWidths<WriteFRcp, WriteFRcp, WriteFRcp, WriteFRcp>; +def SchedWriteFRsqrt + : X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrt, WriteFRsqrt, WriteFRsqrt>; def SchedWriteFLogic : X86SchedWriteWidths<WriteFLogic, WriteFLogic, WriteFLogicY, WriteFLogicY>; |