diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 94 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 78 | ||||
-rwxr-xr-x | llvm/lib/Target/X86/X86SchedBroadwell.td | 63 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedHaswell.td | 83 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedSandyBridge.td | 71 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedSkylakeClient.td | 49 | ||||
-rwxr-xr-x | llvm/lib/Target/X86/X86SchedSkylakeServer.td | 77 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86Schedule.td | 26 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleAtom.td | 26 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleBtVer2.td | 6 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleSLM.td | 68 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleZnver1.td | 18 |
12 files changed, 206 insertions, 453 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index ea1e4e25fc7..94065dec4a8 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4910,42 +4910,42 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, } multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode VecNode, X86FoldableSchedWrite sched, + SDNode VecNode, X86SchedWriteSizes sched, bit IsCommutable> { defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode, - sched, IsCommutable>, + sched.PS.Scl, IsCommutable>, avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode, - sched, IsCommutable>, + sched.PS.Scl, IsCommutable>, XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode, - sched, IsCommutable>, + sched.PD.Scl, IsCommutable>, avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode, - sched, IsCommutable>, + sched.PD.Scl, IsCommutable>, XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; } multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode VecNode, SDNode SaeNode, - X86FoldableSchedWrite sched, bit IsCommutable> { + X86SchedWriteSizes sched, bit IsCommutable> { defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode, - VecNode, SaeNode, sched, IsCommutable>, + VecNode, SaeNode, sched.PS.Scl, IsCommutable>, XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode, - VecNode, SaeNode, sched, IsCommutable>, + VecNode, SaeNode, sched.PD.Scl, IsCommutable>, XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; } defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds, - SchedWriteFAdd.Scl, 1>; + SchedWriteFAddSizes, 1>; defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, - SchedWriteFMul.Scl, 1>; + SchedWriteFMulSizes, 1>; defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, - SchedWriteFAdd.Scl, 0>; + SchedWriteFAddSizes, 0>; defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, - SchedWriteFDiv.Scl, 0>; + SchedWriteFDivSizes, 0>; defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminRnds, - SchedWriteFCmp.Scl, 0>; + SchedWriteFCmpSizes, 0>; defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds, - SchedWriteFCmp.Scl, 0>; + SchedWriteFCmpSizes, 0>; // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use // X86fminc and X86fmaxc instead of X86fmin and X86fmax @@ -5034,86 +5034,86 @@ multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, } multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, - Predicate prd, X86SchedWriteWidths sched, + Predicate prd, X86SchedWriteSizes sched, bit IsCommutable = 0> { let Predicates = [prd] in { defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info, - sched.ZMM, IsCommutable>, EVEX_V512, PS, + sched.PS.ZMM, IsCommutable>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info, - sched.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W, + sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; } // Define only if AVX512VL feature is present. let Predicates = [prd, HasVLX] in { defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info, - sched.XMM, IsCommutable>, EVEX_V128, PS, + sched.PS.XMM, IsCommutable>, EVEX_V128, PS, EVEX_CD8<32, CD8VF>; defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info, - sched.YMM, IsCommutable>, EVEX_V256, PS, + sched.PS.YMM, IsCommutable>, EVEX_V256, PS, EVEX_CD8<32, CD8VF>; defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info, - sched.XMM, IsCommutable>, EVEX_V128, PD, VEX_W, + sched.PD.XMM, IsCommutable>, EVEX_V128, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info, - sched.YMM, IsCommutable>, EVEX_V256, PD, VEX_W, + sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W, EVEX_CD8<64, CD8VF>; } } multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, - X86SchedWriteWidths sched> { - defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.ZMM, + X86SchedWriteSizes sched> { + defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.ZMM, + defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, v8f64_info>, EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; } multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, - X86SchedWriteWidths sched> { - defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.ZMM, + X86SchedWriteSizes sched> { + defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.ZMM, + defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, v8f64_info>, EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; } defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512, - SchedWriteFAdd, 1>, - avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAdd>; + SchedWriteFAddSizes, 1>, + avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>; defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512, - SchedWriteFMul, 1>, - avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMul>; + SchedWriteFMulSizes, 1>, + avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>; defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, - SchedWriteFAdd>, - avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAdd>; + SchedWriteFAddSizes>, + avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>; defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, - SchedWriteFDiv>, - avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDiv>; + SchedWriteFDivSizes>, + avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>; defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, - SchedWriteFCmp, 0>, - avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SchedWriteFCmp>; + SchedWriteFCmpSizes, 0>, + avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SchedWriteFCmpSizes>; defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, - SchedWriteFCmp, 0>, - avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SchedWriteFCmp>; + SchedWriteFCmpSizes, 0>, + avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SchedWriteFCmpSizes>; let isCodeGenOnly = 1 in { defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, - SchedWriteFCmp, 1>; + SchedWriteFCmpSizes, 1>; defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, - SchedWriteFCmp, 1>; + SchedWriteFCmpSizes, 1>; } defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI, - SchedWriteFLogic, 1>; + SchedWriteFLogicSizes, 1>; defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, - SchedWriteFLogic, 0>; + SchedWriteFLogicSizes, 0>; defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI, - SchedWriteFLogic, 1>; + SchedWriteFLogicSizes, 1>; defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI, - SchedWriteFLogic, 1>; + SchedWriteFLogicSizes, 1>; // Patterns catch floating point selects with bitcasted integer logic ops. multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode, @@ -9960,9 +9960,9 @@ def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$sr //===----------------------------------------------------------------------===// defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512, - SchedWriteFShuffle>; + SchedWriteFShuffleSizes>; defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512, - SchedWriteFShuffle>; + SchedWriteFShuffleSizes>; defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, SchedWriteShuffle, HasBWI>; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 1ad7d4cdc81..4913bd486e4 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -2510,99 +2510,99 @@ def : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)), /// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those /// classes below multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, - SDNode OpNode, X86SchedWriteWidths sched> { + SDNode OpNode, X86SchedWriteSizes sched> { let Predicates = [HasAVX, NoVLX] in { defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128, v4f32, f128mem, loadv4f32, - SSEPackedSingle, sched.XMM, 0>, PS, VEX_4V, VEX_WIG; + SSEPackedSingle, sched.PS.XMM, 0>, PS, VEX_4V, VEX_WIG; defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128, v2f64, f128mem, loadv2f64, - SSEPackedDouble, sched.XMM, 0>, PD, VEX_4V, VEX_WIG; + SSEPackedDouble, sched.PD.XMM, 0>, PD, VEX_4V, VEX_WIG; defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256, v8f32, f256mem, loadv8f32, - SSEPackedSingle, sched.YMM, 0>, PS, VEX_4V, VEX_L, VEX_WIG; + SSEPackedSingle, sched.PS.YMM, 0>, PS, VEX_4V, VEX_L, VEX_WIG; defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256, v4f64, f256mem, loadv4f64, - SSEPackedDouble, sched.YMM, 0>, PD, VEX_4V, VEX_L, VEX_WIG; + SSEPackedDouble, sched.PD.YMM, 0>, PD, VEX_4V, VEX_L, VEX_WIG; } let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128, v4f32, f128mem, memopv4f32, SSEPackedSingle, - sched.XMM>, PS; + sched.PS.XMM>, PS; defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128, v2f64, f128mem, memopv2f64, SSEPackedDouble, - sched.XMM>, PD; + sched.PD.XMM>, PD; } } multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, - X86SchedWriteWidths sched> { + X86SchedWriteSizes sched> { defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), - OpNode, FR32, f32mem, SSEPackedSingle, sched.Scl, 0>, + OpNode, FR32, f32mem, SSEPackedSingle, sched.PS.Scl, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG; defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), - OpNode, FR64, f64mem, SSEPackedDouble, sched.Scl, 0>, + OpNode, FR64, f64mem, SSEPackedDouble, sched.PD.Scl, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG; let Constraints = "$src1 = $dst" in { defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), OpNode, FR32, f32mem, SSEPackedSingle, - sched.Scl>, XS; + sched.PS.Scl>, XS; defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), OpNode, FR64, f64mem, SSEPackedDouble, - sched.Scl>, XD; + sched.PD.Scl>, XD; } } multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, - X86SchedWriteWidths sched> { + X86SchedWriteSizes sched> { defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32, !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32, - SSEPackedSingle, sched.Scl, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG; + SSEPackedSingle, sched.PS.Scl, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG; defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64, !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64, - SSEPackedDouble, sched.Scl, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG; + SSEPackedDouble, sched.PD.Scl, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG; let Constraints = "$src1 = $dst" in { defm SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32, !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32, - SSEPackedSingle, sched.Scl>, XS; + SSEPackedSingle, sched.PS.Scl>, XS; defm SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64, !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64, - SSEPackedDouble, sched.Scl>, XD; + SSEPackedDouble, sched.PD.Scl>, XD; } } // Binary Arithmetic instructions -defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SchedWriteFAdd>, - basic_sse12_fp_binop_s<0x58, "add", fadd, SchedWriteFAdd>, - basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SchedWriteFAdd>; -defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SchedWriteFMul>, - basic_sse12_fp_binop_s<0x59, "mul", fmul, SchedWriteFMul>, - basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SchedWriteFMul>; +defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SchedWriteFAddSizes>, + basic_sse12_fp_binop_s<0x58, "add", fadd, SchedWriteFAddSizes>, + basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SchedWriteFAddSizes>; +defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SchedWriteFMulSizes>, + basic_sse12_fp_binop_s<0x59, "mul", fmul, SchedWriteFMulSizes>, + basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SchedWriteFMulSizes>; let isCommutable = 0 in { - defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SchedWriteFAdd>, - basic_sse12_fp_binop_s<0x5C, "sub", fsub, SchedWriteFAdd>, - basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, SchedWriteFAdd>; - defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SchedWriteFDiv>, - basic_sse12_fp_binop_s<0x5E, "div", fdiv, SchedWriteFDiv>, - basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, SchedWriteFDiv>; - defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SchedWriteFCmp>, - basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SchedWriteFCmp>, - basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, SchedWriteFCmp>; - defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SchedWriteFCmp>, - basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SchedWriteFCmp>, - basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, SchedWriteFCmp>; + defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SchedWriteFAddSizes>, + basic_sse12_fp_binop_s<0x5C, "sub", fsub, SchedWriteFAddSizes>, + basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, SchedWriteFAddSizes>; + defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SchedWriteFDivSizes>, + basic_sse12_fp_binop_s<0x5E, "div", fdiv, SchedWriteFDivSizes>, + basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, SchedWriteFDivSizes>; + defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SchedWriteFCmpSizes>, + basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SchedWriteFCmpSizes>, + basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, SchedWriteFCmpSizes>; + defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SchedWriteFCmpSizes>, + basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SchedWriteFCmpSizes>, + basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, SchedWriteFCmpSizes>; } let isCodeGenOnly = 1 in { - defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SchedWriteFCmp>, - basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SchedWriteFCmp>; - defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SchedWriteFCmp>, - basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SchedWriteFCmp>; + defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SchedWriteFCmpSizes>, + basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SchedWriteFCmpSizes>; + defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SchedWriteFCmpSizes>, + basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SchedWriteFCmpSizes>; } // Patterns used to select SSE scalar fp arithmetic instructions from diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 93adb100b76..936dd6e81a6 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -162,8 +162,15 @@ defm : BWWriteResPair<WriteFCmpY, [BWPort1], 3, [1], 1, 6>; // Floating point defm : BWWriteResPair<WriteFCom, [BWPort1], 3>; // Floating point compare to flags. defm : BWWriteResPair<WriteFMul, [BWPort01], 3, [1], 1, 5>; // Floating point multiplication. defm : BWWriteResPair<WriteFMulY, [BWPort01], 3, [1], 1, 6>; // Floating point multiplication (YMM/ZMM). -defm : BWWriteResPair<WriteFDiv, [BWPort0], 12, [1], 1, 5>; // 10-14 cycles. // Floating point division. -defm : BWWriteResPair<WriteFDivY, [BWPort0], 12, [1], 1, 7>; // 10-14 cycles. // Floating point division (YMM/ZMM). + +//defm : BWWriteResPair<WriteFDiv, [BWPort0,BWFPDivider], 11, [1,3], 1, 5>; // Floating point division. +defm : BWWriteResPair<WriteFDivX, [BWPort0,BWFPDivider], 11, [1,5], 1, 5>; // Floating point division (XMM). +defm : BWWriteResPair<WriteFDivY, [BWPort0,BWPort015,BWFPDivider], 17, [2,1,10], 3, 6>; // Floating point division (YMM). +defm : BWWriteResPair<WriteFDivZ, [BWPort0,BWPort015,BWFPDivider], 17, [2,1,10], 3, 6>; // Floating point division (ZMM). +//defm : BWWriteResPair<WriteFDiv64, [BWPort0,BWFPDivider], 14, [1,8], 1, 5>; // Floating point division. +defm : BWWriteResPair<WriteFDiv64X, [BWPort0,BWFPDivider], 14, [1,8], 1, 5>; // Floating point division (XMM). +defm : BWWriteResPair<WriteFDiv64Y, [BWPort0,BWPort015,BWFPDivider], 23, [2,1,16], 3, 6>; // Floating point division (YMM). +defm : BWWriteResPair<WriteFDiv64Z, [BWPort0,BWPort015,BWFPDivider], 23, [2,1,16], 3, 6>; // Floating point division (ZMM). defm : X86WriteRes<WriteFSqrt, [BWPort0,BWFPDivider], 11, [1,4], 1>; // Floating point square root. defm : X86WriteRes<WriteFSqrtLd, [BWPort0,BWPort23,BWFPDivider], 16, [1,1,7], 2>; @@ -1394,19 +1401,12 @@ def BWWriteResGroup121 : SchedWriteRes<[BWPort1,BWPort23,BWPort06,BWPort0156]> { } def: InstRW<[BWWriteResGroup121], (instrs IMUL32m, MUL32m, MULX32rm)>; -def BWWriteResGroup122 : SchedWriteRes<[BWPort0,BWFPDivider]> { - let Latency = 11; - let NumMicroOps = 1; - let ResourceCycles = [1,5]; -} -def: InstRW<[BWWriteResGroup122], (instregex "(V?)DIVPSrr")>; - def BWWriteResGroup122_1 : SchedWriteRes<[BWPort0,BWFPDivider]> { let Latency = 11; let NumMicroOps = 1; let ResourceCycles = [1,3]; // Really 2.5 cycle throughput } -def: InstRW<[BWWriteResGroup122_1], (instregex "(V?)DIVSSrr")>; +def : SchedAlias<WriteFDiv, BWWriteResGroup122_1>; // TODO - convert to ZnWriteResFpuPair def BWWriteResGroup123 : SchedWriteRes<[BWPort0,BWPort23]> { let Latency = 11; @@ -1461,19 +1461,12 @@ def BWWriteResGroup135 : SchedWriteRes<[BWPort1,BWPort23]> { } def: InstRW<[BWWriteResGroup135], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>; -def BWWriteResGroup139 : SchedWriteRes<[BWPort0,BWFPDivider]> { - let Latency = 14; - let NumMicroOps = 1; - let ResourceCycles = [1,8]; -} -def: InstRW<[BWWriteResGroup139], (instregex "(V?)DIVPDrr")>; - def BWWriteResGroup139_1 : SchedWriteRes<[BWPort0,BWFPDivider]> { let Latency = 14; let NumMicroOps = 1; let ResourceCycles = [1,4]; } -def: InstRW<[BWWriteResGroup139_1], (instregex "(V?)DIVSDrr")>; +def : SchedAlias<WriteFDiv64, BWWriteResGroup139_1>; // TODO - convert to ZnWriteResFpuPair def BWWriteResGroup141 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> { let Latency = 14; @@ -1524,8 +1517,7 @@ def BWWriteResGroup150 : SchedWriteRes<[BWPort0,BWPort23,BWFPDivider]> { let NumMicroOps = 2; let ResourceCycles = [1,1,5]; } -def: InstRW<[BWWriteResGroup150], (instregex "(V?)DIVPSrm", - "(V?)DIVSSrm")>; +def : SchedAlias<WriteFDivLd, BWWriteResGroup150>; // TODO - convert to ZnWriteResFpuPair def BWWriteResGroup153 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06,BWPort15,BWPort0156]> { let Latency = 16; @@ -1541,13 +1533,6 @@ def BWWriteResGroup154 : SchedWriteRes<[BWPort5]> { } def: InstRW<[BWWriteResGroup154], (instrs VZEROALL)>; -def BWWriteResGroup155 : SchedWriteRes<[BWPort0,BWPort015,BWFPDivider]> { - let Latency = 17; - let NumMicroOps = 3; - let ResourceCycles = [2,1,10]; -} -def: InstRW<[BWWriteResGroup155], (instregex "VDIVPSYrr")>; - def BWWriteResGroup159 : SchedWriteRes<[BWPort5,BWPort6,BWPort06,BWPort0156]> { let Latency = 18; let NumMicroOps = 8; @@ -1568,8 +1553,7 @@ def BWWriteResGroup161 : SchedWriteRes<[BWPort0,BWPort23,BWFPDivider]> { let NumMicroOps = 2; let ResourceCycles = [1,1,8]; } -def: InstRW<[BWWriteResGroup161], (instregex "(V?)DIVPDrm", - "(V?)DIVSDrm")>; +def : SchedAlias<WriteFDiv64Ld, BWWriteResGroup161>; // TODO - convert to ZnWriteResFpuPair def BWWriteResGroup165 : SchedWriteRes<[BWPort0]> { let Latency = 20; @@ -1608,20 +1592,6 @@ def BWWriteResGroup172 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> { } def: InstRW<[BWWriteResGroup172], (instregex "POPF64")>; -def BWWriteResGroup173 : SchedWriteRes<[BWPort0,BWPort015,BWFPDivider]> { - let Latency = 23; - let NumMicroOps = 3; - let ResourceCycles = [2,1,16]; -} -def: InstRW<[BWWriteResGroup173], (instregex "VDIVPDYrr")>; - -def BWWriteResGroup174 : SchedWriteRes<[BWPort0,BWPort23,BWPort015,BWFPDivider]> { - let Latency = 23; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1,10]; -} -def: InstRW<[BWWriteResGroup174], (instregex "VDIVPSYrm")>; - def BWWriteResGroup176 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> { let Latency = 23; let NumMicroOps = 19; @@ -1650,13 +1620,6 @@ def BWWriteResGroup182 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> { } def: InstRW<[BWWriteResGroup182], (instregex "DIVR_FI(16|32)m")>; -def BWWriteResGroup183 : SchedWriteRes<[BWPort0,BWPort23,BWPort015,BWFPDivider]> { - let Latency = 29; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1,16]; -} -def: InstRW<[BWWriteResGroup183], (instregex "VDIVPDYrm")>; - def BWWriteResGroup183_1 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { let Latency = 22; let NumMicroOps = 7; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 2e2535eda30..b7de192b00e 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -156,8 +156,15 @@ defm : HWWriteResPair<WriteFCmpY, [HWPort1], 3, [1], 1, 7>; defm : HWWriteResPair<WriteFCom, [HWPort1], 3>; defm : HWWriteResPair<WriteFMul, [HWPort01], 5, [1], 1, 6>; defm : HWWriteResPair<WriteFMulY, [HWPort01], 5, [1], 1, 7>; -defm : HWWriteResPair<WriteFDiv, [HWPort0], 12, [1], 1, 5>; // 10-14 cycles. -defm : HWWriteResPair<WriteFDivY, [HWPort0], 12, [1], 1, 7>; // 10-14 cycles. + +defm : HWWriteResPair<WriteFDiv, [HWPort0,HWFPDivider], 13, [1,7], 1, 5>; +defm : HWWriteResPair<WriteFDivX, [HWPort0,HWFPDivider], 13, [1,7], 1, 6>; +defm : HWWriteResPair<WriteFDivY, [HWPort0,HWPort15,HWFPDivider], 21, [2,1,14], 3, 7>; +defm : HWWriteResPair<WriteFDivZ, [HWPort0,HWPort15,HWFPDivider], 21, [2,1,14], 3, 7>; +defm : HWWriteResPair<WriteFDiv64, [HWPort0,HWFPDivider], 20, [1,14], 1, 5>; +defm : HWWriteResPair<WriteFDiv64X, [HWPort0,HWFPDivider], 20, [1,14], 1, 6>; +defm : HWWriteResPair<WriteFDiv64Y, [HWPort0,HWPort15,HWFPDivider], 35, [2,1,28], 3, 7>; +defm : HWWriteResPair<WriteFDiv64Z, [HWPort0,HWPort15,HWFPDivider], 35, [2,1,28], 3, 7>; defm : HWWriteResPair<WriteFRcp, [HWPort0], 5, [1], 1, 5>; defm : HWWriteResPair<WriteFRcpX, [HWPort0], 5, [1], 1, 6>; @@ -1652,13 +1659,6 @@ def: InstRW<[HWWriteResGroup89], (instregex "(V?)PCMPGTQ(Y?)rr", "MUL_FST0r", "MUL_FrST0")>; -def HWWriteResGroup91_4 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> { - let Latency = 18; - let NumMicroOps = 2; - let ResourceCycles = [1,1,7]; -} -def: InstRW<[HWWriteResGroup91_4], (instregex "(V?)DIVSSrm")>; - def HWWriteResGroup91_2 : SchedWriteRes<[HWPort0,HWPort23]> { let Latency = 11; let NumMicroOps = 2; @@ -1828,14 +1828,6 @@ def HWWriteResGroup120 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPo } def: InstRW<[HWWriteResGroup120], (instregex "RCL(8|16|32|64)mCL")>; -def HWWriteResGroup121 : SchedWriteRes<[HWPort0,HWFPDivider]> { - let Latency = 13; - let NumMicroOps = 1; - let ResourceCycles = [1,7]; -} -def: InstRW<[HWWriteResGroup121], (instregex "(V?)DIVPSrr", - "(V?)DIVSSrr")>; - def HWWriteResGroup129 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> { let Latency = 11; let NumMicroOps = 7; @@ -1865,13 +1857,6 @@ def HWWriteResGroup132 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06,HWPo } def: InstRW<[HWWriteResGroup132], (instregex "CMPXCHG8B")>; -def HWWriteResGroup134 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> { - let Latency = 19; - let NumMicroOps = 2; - let ResourceCycles = [1,1,7]; -} -def: InstRW<[HWWriteResGroup134], (instregex "(V?)DIVPSrm")>; - def HWWriteResGroup135 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPort15,HWPort0156]> { let Latency = 19; let NumMicroOps = 11; @@ -1945,14 +1930,6 @@ def: InstRW<[HWWriteResGroup154], (instregex "DIV_FPrST0", "DIV_FST0r", "DIV_FrST0")>; -def HWWriteResGroup154_1 : SchedWriteRes<[HWPort0,HWFPDivider]> { - let Latency = 20; - let NumMicroOps = 1; - let ResourceCycles = [1,14]; -} -def: InstRW<[HWWriteResGroup154_1], (instregex "(V?)DIVPDrr", - "(V?)DIVSDrr")>; - def HWWriteResGroup155 : SchedWriteRes<[HWPort0,HWPort23]> { let Latency = 27; let NumMicroOps = 2; @@ -1960,20 +1937,6 @@ def HWWriteResGroup155 : SchedWriteRes<[HWPort0,HWPort23]> { } def: InstRW<[HWWriteResGroup155], (instregex "DIVR_F(32|64)m")>; -def HWWriteResGroup155_1 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> { - let Latency = 26; - let NumMicroOps = 2; - let ResourceCycles = [1,1,14]; -} -def: InstRW<[HWWriteResGroup155_1], (instregex "(V?)DIVPDrm")>; - -def HWWriteResGroup155_4 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> { - let Latency = 25; - let NumMicroOps = 2; - let ResourceCycles = [1,1,14]; -} -def: InstRW<[HWWriteResGroup155_4], (instregex "(V?)DIVSDrm")>; - def HWWriteResGroup156 : SchedWriteRes<[HWPort5,HWPort6,HWPort0156]> { let Latency = 20; let NumMicroOps = 10; @@ -1981,20 +1944,6 @@ def HWWriteResGroup156 : SchedWriteRes<[HWPort5,HWPort6,HWPort0156]> { } def: InstRW<[HWWriteResGroup156], (instregex "MWAITrr")>; -def HWWriteResGroup159 : SchedWriteRes<[HWPort0,HWPort15,HWFPDivider]> { - let Latency = 21; - let NumMicroOps = 3; - let ResourceCycles = [2,1,14]; -} -def: InstRW<[HWWriteResGroup159], (instregex "VDIVPSYrr")>; - -def HWWriteResGroup160 : SchedWriteRes<[HWPort0,HWPort23,HWPort15,HWFPDivider]> { - let Latency = 28; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1,14]; -} -def: InstRW<[HWWriteResGroup160], (instregex "VDIVPSYrm")>; - def HWWriteResGroup161 : SchedWriteRes<[HWPort0,HWPort1,HWPort23]> { let Latency = 30; let NumMicroOps = 3; @@ -2055,20 +2004,6 @@ def HWWriteResGroup171 : SchedWriteRes<[HWPort5,HWPort6,HWPort23,HWPort237,HWPor def: InstRW<[HWWriteResGroup171], (instregex "OUT(8|16|32)ir", "OUT(8|16|32)rr")>; -def HWWriteResGroup173 : SchedWriteRes<[HWPort0,HWPort15,HWFPDivider]> { - let Latency = 35; - let NumMicroOps = 3; - let ResourceCycles = [2,1,28]; -} -def: InstRW<[HWWriteResGroup173], (instregex "VDIVPDYrr")>; - -def HWWriteResGroup174 : SchedWriteRes<[HWPort0,HWPort23,HWPort15,HWFPDivider]> { - let Latency = 42; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1,28]; -} -def: InstRW<[HWWriteResGroup174], (instregex "VDIVPDYrm")>; - def HWWriteResGroup175 : SchedWriteRes<[HWPort1,HWPort4,HWPort5,HWPort6,HWPort23,HWPort237,HWPort15,HWPort0156]> { let Latency = 41; let NumMicroOps = 18; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index c3ef44b4ee8..fcd5fb93f65 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -146,8 +146,15 @@ defm : SBWriteResPair<WriteFCmpY, [SBPort1], 3, [1], 1, 7>; defm : SBWriteResPair<WriteFCom, [SBPort1], 3>; defm : SBWriteResPair<WriteFMul, [SBPort0], 5, [1], 1, 6>; defm : SBWriteResPair<WriteFMulY, [SBPort0], 5, [1], 1, 7>; -defm : SBWriteResPair<WriteFDiv, [SBPort0], 24, [1], 1, 5>; -defm : SBWriteResPair<WriteFDivY, [SBPort0], 24, [1], 1, 7>; + +defm : SBWriteResPair<WriteFDiv, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>; +defm : SBWriteResPair<WriteFDivX, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>; +defm : SBWriteResPair<WriteFDivY, [SBPort0,SBPort05,SBFPDivider], 29, [2,1,28], 3, 7>; +defm : SBWriteResPair<WriteFDivZ, [SBPort0,SBPort05,SBFPDivider], 29, [2,1,28], 3, 7>; +defm : SBWriteResPair<WriteFDiv64, [SBPort0,SBFPDivider], 22, [1,22], 1, 6>; +defm : SBWriteResPair<WriteFDiv64X, [SBPort0,SBFPDivider], 22, [1,22], 1, 6>; +defm : SBWriteResPair<WriteFDiv64Y, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>; +defm : SBWriteResPair<WriteFDiv64Z, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>; defm : SBWriteResPair<WriteFRcp, [SBPort0], 5, [1], 1, 6>; defm : SBWriteResPair<WriteFRcpX, [SBPort0], 5, [1], 1, 6>; @@ -1361,14 +1368,6 @@ def SBWriteResGroup114 : SchedWriteRes<[SBPort1,SBPort23]> { } def: InstRW<[SBWriteResGroup114], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>; -def SBWriteResGroup116 : SchedWriteRes<[SBPort0,SBFPDivider]> { - let Latency = 14; - let NumMicroOps = 1; - let ResourceCycles = [1,14]; -} -def: InstRW<[SBWriteResGroup116], (instregex "(V?)DIVPSrr", - "(V?)DIVSSrr")>; - def SBWriteResGroup119 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { let Latency = 15; let NumMicroOps = 3; @@ -1376,37 +1375,6 @@ def SBWriteResGroup119 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { } def: InstRW<[SBWriteResGroup119], (instregex "MUL_FI(16|32)m")>; -def SBWriteResGroup123 : SchedWriteRes<[SBPort0,SBPort23,SBFPDivider]> { - let Latency = 20; - let NumMicroOps = 2; - let ResourceCycles = [1,1,14]; -} -def: InstRW<[SBWriteResGroup123], (instregex "(V?)DIVPSrm", - "(V?)DIVSSrm")>; - -def SBWriteResGroup126 : SchedWriteRes<[SBPort0,SBFPDivider]> { - let Latency = 22; - let NumMicroOps = 1; - let ResourceCycles = [1,22]; -} -def: InstRW<[SBWriteResGroup126], (instregex "(V?)DIVPDrr", - "(V?)DIVSDrr")>; - -def SBWriteResGroup128 : SchedWriteRes<[SBPort0,SBPort23,SBFPDivider]> { - let Latency = 28; - let NumMicroOps = 2; - let ResourceCycles = [1,1,22]; -} -def: InstRW<[SBWriteResGroup128], (instregex "(V?)DIVPDrm", - "(V?)DIVSDrm")>; - -def SBWriteResGroup129 : SchedWriteRes<[SBPort0,SBPort05,SBFPDivider]> { - let Latency = 29; - let NumMicroOps = 3; - let ResourceCycles = [2,1,28]; -} -def: InstRW<[SBWriteResGroup129], (instregex "VDIVPSYrr")>; - def SBWriteResGroup130 : SchedWriteRes<[SBPort0,SBPort23]> { let Latency = 31; let NumMicroOps = 2; @@ -1421,25 +1389,4 @@ def SBWriteResGroup131 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { } def: InstRW<[SBWriteResGroup131], (instregex "DIV(R?)_FI(16|32)m")>; -def SBWriteResGroup132 : SchedWriteRes<[SBPort0,SBPort23,SBPort05,SBFPDivider]> { - let Latency = 36; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1,28]; -} -def: InstRW<[SBWriteResGroup132], (instregex "VDIVPSYrm")>; - -def SBWriteResGroup133 : SchedWriteRes<[SBPort0,SBPort05,SBFPDivider]> { - let Latency = 45; - let NumMicroOps = 3; - let ResourceCycles = [2,1,44]; -} -def: InstRW<[SBWriteResGroup133], (instregex "VDIVPDYrr")>; - -def SBWriteResGroup134 : SchedWriteRes<[SBPort0,SBPort23,SBPort05,SBFPDivider]> { - let Latency = 52; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1,44]; -} -def: InstRW<[SBWriteResGroup134], (instregex "VDIVPDYrm")>; - } // SchedModel diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 077eeed90d4..9875ce32236 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -159,8 +159,15 @@ defm : SKLWriteResPair<WriteFCmpY, [SKLPort01], 4, [1], 1, 7>; // Floating poin defm : SKLWriteResPair<WriteFCom, [SKLPort0], 2>; // Floating point compare to flags. defm : SKLWriteResPair<WriteFMul, [SKLPort01], 4, [1], 1, 6>; // Floating point multiplication. defm : SKLWriteResPair<WriteFMulY, [SKLPort01], 4, [1], 1, 7>; // Floating point multiplication (YMM/ZMM). -defm : SKLWriteResPair<WriteFDiv, [SKLPort0], 12, [1], 1, 5>; // 10-14 cycles. // Floating point division. -defm : SKLWriteResPair<WriteFDivY, [SKLPort0], 12, [1], 1, 7>; // 10-14 cycles. // Floating point division (YMM/ZMM). + +defm : SKLWriteResPair<WriteFDiv, [SKLPort0,SKLFPDivider], 11, [1,3], 1, 5>; // Floating point division. +//defm : SKLWriteResPair<WriteFDivX, [SKLPort0,SKLFPDivider], 11, [1,3], 1, 6>; // Floating point division (XMM). +defm : SKLWriteResPair<WriteFDivY, [SKLPort0,SKLFPDivider], 11, [1,5], 1, 7>; // Floating point division (YMM). +defm : SKLWriteResPair<WriteFDivZ, [SKLPort0,SKLFPDivider], 11, [1,5], 1, 7>; // Floating point division (ZMM). +//defm : SKLWriteResPair<WriteFDiv64, [SKLPort0,SKLFPDivider], 14, [1,3], 1, 5>; // Floating point double division. +//defm : SKLWriteResPair<WriteFDiv64X, [SKLPort0,SKLFPDivider], 14, [1,3], 1, 6>; // Floating point double division (XMM). +//defm : SKLWriteResPair<WriteFDiv64Y, [SKLPort0,SKLFPDivider], 14, [1,5], 1, 7>; // Floating point double division (YMM). +defm : SKLWriteResPair<WriteFDiv64Z, [SKLPort0,SKLFPDivider], 14, [1,5], 1, 7>; // Floating point double division (ZMM). defm : SKLWriteResPair<WriteFSqrt, [SKLPort0,SKLFPDivider], 12, [1,3], 1, 5>; // Floating point square root. defm : SKLWriteResPair<WriteFSqrtX, [SKLPort0,SKLFPDivider], 12, [1,3], 1, 6>; // Floating point square root (XMM). @@ -1611,15 +1618,7 @@ def SKLWriteResGroup145 : SchedWriteRes<[SKLPort0,SKLFPDivider]> { let NumMicroOps = 1; let ResourceCycles = [1,3]; } -def: InstRW<[SKLWriteResGroup145], (instregex "(V?)DIVPSrr", - "(V?)DIVSSrr")>; - -def SKLWriteResGroup145_1 : SchedWriteRes<[SKLPort0,SKLFPDivider]> { - let Latency = 11; - let NumMicroOps = 1; - let ResourceCycles = [1,5]; -} -def: InstRW<[SKLWriteResGroup145_1], (instregex "VDIVPSYrr")>; +def : SchedAlias<WriteFDivX, SKLWriteResGroup145>; // TODO - convert to ZnWriteResFpuPair def SKLWriteResGroup146 : SchedWriteRes<[SKLPort0,SKLPort23]> { let Latency = 11; @@ -1736,15 +1735,15 @@ def SKLWriteResGroup166 : SchedWriteRes<[SKLPort0,SKLFPDivider]> { let NumMicroOps = 1; let ResourceCycles = [1,3]; } -def: InstRW<[SKLWriteResGroup166], (instregex "(V?)DIVPDrr", - "(V?)DIVSDrr")>; +def : SchedAlias<WriteFDiv64, SKLWriteResGroup166>; // TODO - convert to ZnWriteResFpuPair +def : SchedAlias<WriteFDiv64X, SKLWriteResGroup166>; // TODO - convert to ZnWriteResFpuPair def SKLWriteResGroup166_1 : SchedWriteRes<[SKLPort0,SKLFPDivider]> { let Latency = 14; let NumMicroOps = 1; let ResourceCycles = [1,5]; } -def: InstRW<[SKLWriteResGroup166_1], (instregex "VDIVPDYrr")>; +def : SchedAlias<WriteFDiv64Y, SKLWriteResGroup166_1>; // TODO - convert to ZnWriteResFpuPair def SKLWriteResGroup169 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { let Latency = 14; @@ -1776,13 +1775,6 @@ def SKLWriteResGroup174 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06 } def: InstRW<[SKLWriteResGroup174], (instregex "RCL(8|16|32|64)mCL")>; -def SKLWriteResGroup175 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { - let Latency = 16; - let NumMicroOps = 2; - let ResourceCycles = [1,1,3]; -} -def: InstRW<[SKLWriteResGroup175], (instregex "(V?)DIVSSrm")>; - def SKLWriteResGroup177 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06,SKLPort15,SKLPort0156]> { let Latency = 16; let NumMicroOps = 14; @@ -1802,7 +1794,7 @@ def SKLWriteResGroup179 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { let NumMicroOps = 2; let ResourceCycles = [1,1,5]; } -def: InstRW<[SKLWriteResGroup179], (instregex "(V?)DIVPSrm")>; +def : SchedAlias<WriteFDivXLd, SKLWriteResGroup179>; // TODO - convert to ZnWriteResFpuPair def SKLWriteResGroup180 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156]> { let Latency = 17; @@ -1811,13 +1803,6 @@ def SKLWriteResGroup180 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKL } def: InstRW<[SKLWriteResGroup180], (instrs XCH_F)>; -def SKLWriteResGroup182 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { - let Latency = 18; - let NumMicroOps = 2; - let ResourceCycles = [1,1,5]; -} -def: InstRW<[SKLWriteResGroup182], (instregex "VDIVPSYrm")>; - def SKLWriteResGroup184 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort06,SKLPort0156]> { let Latency = 18; let NumMicroOps = 8; @@ -1837,7 +1822,7 @@ def SKLWriteResGroup186 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { let NumMicroOps = 2; let ResourceCycles = [1,1,4]; } -def: InstRW<[SKLWriteResGroup186], (instregex "(V?)DIVSDrm")>; +def : SchedAlias<WriteFDiv64Ld, SKLWriteResGroup186>; // TODO - convert to ZnWriteResFpuPair def SKLWriteResGroup189 : SchedWriteRes<[SKLPort0]> { let Latency = 20; @@ -1853,7 +1838,7 @@ def SKLWriteResGroup190 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { let NumMicroOps = 2; let ResourceCycles = [1,1,4]; } -def: InstRW<[SKLWriteResGroup190], (instregex "(V?)DIVPDrm")>; +def : SchedAlias<WriteFDiv64XLd, SKLWriteResGroup190>; // TODO - convert to ZnWriteResFpuPair def SKLWriteResGroup192 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { let Latency = 20; @@ -1874,7 +1859,7 @@ def SKLWriteResGroup195 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { let NumMicroOps = 2; let ResourceCycles = [1,1,8]; } -def: InstRW<[SKLWriteResGroup195], (instregex "VDIVPDYrm")>; +def : SchedAlias<WriteFDiv64YLd, SKLWriteResGroup195>; // TODO - convert to ZnWriteResFpuPair def SKLWriteResGroup196 : SchedWriteRes<[SKLPort0,SKLPort23]> { let Latency = 22; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index bd58687884e..7bff6d0844a 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -159,8 +159,15 @@ defm : SKXWriteResPair<WriteFCmpY,[SKXPort015], 4, [1], 1, 7>; // Floating poin defm : SKXWriteResPair<WriteFCom, [SKXPort0], 2>; // Floating point compare to flags. defm : SKXWriteResPair<WriteFMul, [SKXPort015], 4, [1], 1, 6>; // Floating point multiplication. defm : SKXWriteResPair<WriteFMulY,[SKXPort015], 4, [1], 1, 7>; // Floating point multiplication (YMM/ZMM). -defm : SKXWriteResPair<WriteFDiv, [SKXPort0], 12, [1], 1, 5>; // 10-14 cycles. // Floating point division. -defm : SKXWriteResPair<WriteFDivY, [SKXPort0], 12, [1], 1, 7>; // 10-14 cycles. // Floating point division (YMM/ZMM). + +defm : SKXWriteResPair<WriteFDiv, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 5>; // 10-14 cycles. // Floating point division. +//defm : SKXWriteResPair<WriteFDivX, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 6>; // 10-14 cycles. // Floating point division (XMM). +defm : SKXWriteResPair<WriteFDivY, [SKXPort0,SKXFPDivider], 11, [1,5], 1, 7>; // 10-14 cycles. // Floating point division (YMM). +defm : SKXWriteResPair<WriteFDivZ, [SKXPort0,SKXPort5,SKXFPDivider], 18, [2,1,10], 3, 7>; // 10-14 cycles. // Floating point division (ZMM). +//defm : SKXWriteResPair<WriteFDiv64, [SKXPort0,SKXFPDivider], 14, [1,3], 1, 5>; // 10-14 cycles. // Floating point division. +//defm : SKXWriteResPair<WriteFDiv64X, [SKXPort0,SKXFPDivider], 14, [1,3], 1, 6>; // 10-14 cycles. // Floating point division (XMM). +//defm : SKXWriteResPair<WriteFDiv64Y, [SKXPort0,SKXFPDivider], 14, [1,5], 1, 7>; // 10-14 cycles. // Floating point division (YMM). +defm : SKXWriteResPair<WriteFDiv64Z, [SKXPort0,SKXPort5,SKXFPDivider], 23, [2,1,16], 3, 7>; // 10-14 cycles. // Floating point division (ZMM). defm : SKXWriteResPair<WriteFSqrt, [SKXPort0,SKXFPDivider], 12, [1,3], 1, 5>; // Floating point square root. defm : SKXWriteResPair<WriteFSqrtX, [SKXPort0,SKXFPDivider], 12, [1,3], 1, 6>; // Floating point square root (XMM). @@ -2708,15 +2715,7 @@ def SKXWriteResGroup159 : SchedWriteRes<[SKXPort0,SKXFPDivider]> { let NumMicroOps = 1; let ResourceCycles = [1,3]; } -def: InstRW<[SKXWriteResGroup159], (instregex "(V?)DIVPS(Z128)?rr", - "(V?)DIVSS(Z?)rr")>; - -def SKXWriteResGroup159_1 : SchedWriteRes<[SKXPort0,SKXFPDivider]> { - let Latency = 11; - let NumMicroOps = 1; - let ResourceCycles = [1,5]; -} -def: InstRW<[SKXWriteResGroup159_1], (instregex "VDIVPS(Y|Z256)rr")>; +def : SchedAlias<WriteFDivX, SKXWriteResGroup159>; // TODO - convert to ZnWriteResFpuPair def SKXWriteResGroup160 : SchedWriteRes<[SKXPort0,SKXPort23]> { let Latency = 11; @@ -2958,15 +2957,15 @@ def SKXWriteResGroup184 : SchedWriteRes<[SKXPort0,SKXFPDivider]> { let NumMicroOps = 1; let ResourceCycles = [1,3]; } -def: InstRW<[SKXWriteResGroup184], (instregex "(V?)DIVPDrr", - "(V?)DIVSD(Z?)rr")>; +def : SchedAlias<WriteFDiv64, SKXWriteResGroup184>; // TODO - convert to ZnWriteResFpuPair +def : SchedAlias<WriteFDiv64X, SKXWriteResGroup184>; // TODO - convert to ZnWriteResFpuPair def SKXWriteResGroup184_1 : SchedWriteRes<[SKXPort0,SKXFPDivider]> { let Latency = 14; let NumMicroOps = 1; let ResourceCycles = [1,5]; } -def: InstRW<[SKXWriteResGroup184_1], (instregex "VDIVPD(Y|Z256)rr")>; +def : SchedAlias<WriteFDiv64Y, SKXWriteResGroup184_1>; // TODO - convert to ZnWriteResFpuPair def SKXWriteResGroup187 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { let Latency = 14; @@ -3028,13 +3027,6 @@ def SKXWriteResGroup195 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06 } def: InstRW<[SKXWriteResGroup195], (instregex "RCL(8|16|32|64)mCL")>; -def SKXWriteResGroup196 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { - let Latency = 16; - let NumMicroOps = 2; - let ResourceCycles = [1,1,3]; -} -def: InstRW<[SKXWriteResGroup196], (instregex "(V?)DIVSS(Z?)rm")>; - def SKXWriteResGroup198 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> { let Latency = 16; let NumMicroOps = 4; @@ -3064,7 +3056,7 @@ def SKXWriteResGroup201 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { let NumMicroOps = 2; let ResourceCycles = [1,1,5]; } -def: InstRW<[SKXWriteResGroup201], (instregex "(V?)DIVPS(Z128)?rm")>; +def : SchedAlias<WriteFDivXLd, SKXWriteResGroup201>; // TODO - convert to ZnWriteResFpuPair def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156]> { let Latency = 17; @@ -3073,13 +3065,6 @@ def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKX } def: InstRW<[SKXWriteResGroup202], (instrs XCH_F)>; -def SKXWriteResGroup204 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { - let Latency = 18; - let NumMicroOps = 2; - let ResourceCycles = [1,1,5]; -} -def: InstRW<[SKXWriteResGroup204], (instregex "VDIVPS(Y|Z256)rm")>; - def SKXWriteResGroup205 : SchedWriteRes<[SKXPort23,SKXPort015]> { let Latency = 18; let NumMicroOps = 4; @@ -3106,7 +3091,7 @@ def SKXWriteResGroup209 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { let NumMicroOps = 2; let ResourceCycles = [1,1,4]; } -def: InstRW<[SKXWriteResGroup209], (instregex "(V?)DIVSD(Z?)rm")>; +def : SchedAlias<WriteFDiv64Ld, SKXWriteResGroup209>; // TODO - convert to ZnWriteResFpuPair def SKXWriteResGroup211 : SchedWriteRes<[SKXPort23,SKXPort015]> { let Latency = 19; @@ -3138,7 +3123,7 @@ def SKXWriteResGroup216 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { let NumMicroOps = 2; let ResourceCycles = [1,1,4]; } -def: InstRW<[SKXWriteResGroup216], (instregex "(V?)DIVPD(Z128)?rm")>; +def : SchedAlias<WriteFDiv64XLd, SKXWriteResGroup216>; // TODO - convert to ZnWriteResFpuPair def SKXWriteResGroup218 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { let Latency = 20; @@ -3169,7 +3154,7 @@ def SKXWriteResGroup222 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { let NumMicroOps = 2; let ResourceCycles = [1,1,8]; } -def: InstRW<[SKXWriteResGroup222], (instregex "VDIVPD(Y|Z256)rm")>; +def : SchedAlias<WriteFDiv64YLd, SKXWriteResGroup222>; // TODO - convert to ZnWriteResFpuPair def SKXWriteResGroup223 : SchedWriteRes<[SKXPort0,SKXPort23]> { let Latency = 22; @@ -3238,20 +3223,6 @@ def SKXWriteResGroup225 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> { def: InstRW<[SKXWriteResGroup225], (instregex "VPCONFLICTDZ128rr", "VPCONFLICTQZ256rr")>; -def SKXWriteResGroup227 : SchedWriteRes<[SKXPort0,SKXPort5,SKXFPDivider]> { - let Latency = 23; - let NumMicroOps = 3; - let ResourceCycles = [2,1,16]; -} -def: InstRW<[SKXWriteResGroup227], (instregex "VDIVPDZrr")>; - -def SKXWriteResGroup227_1 : SchedWriteRes<[SKXPort0,SKXPort5,SKXFPDivider]> { - let Latency = 18; - let NumMicroOps = 3; - let ResourceCycles = [2,1,10]; -} -def: InstRW<[SKXWriteResGroup227_1], (instregex "VDIVPSZrr")>; - def SKXWriteResGroup228 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { let Latency = 23; let NumMicroOps = 19; @@ -3259,13 +3230,6 @@ def SKXWriteResGroup228 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort23,SK } def: InstRW<[SKXWriteResGroup228], (instregex "CMPXCHG16B")>; -def SKXWriteResGroup230 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort5,SKXFPDivider]> { - let Latency = 25; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1,10]; -} -def: InstRW<[SKXWriteResGroup230], (instregex "VDIVPSZrm(b?)")>; - def SKXWriteResGroup233 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { let Latency = 25; let NumMicroOps = 3; @@ -3330,13 +3294,6 @@ def SKXWriteResGroup243 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { } def: InstRW<[SKXWriteResGroup243], (instregex "DIVR_FI(16|32)m")>; -def SKXWriteResGroup244 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort5,SKXFPDivider]> { - let Latency = 30; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1,16]; -} -def: InstRW<[SKXWriteResGroup244], (instregex "VDIVPDZrm(b?)")>; - def SKXWriteResGroup245 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { let Latency = 30; let NumMicroOps = 5; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index cc933c80eef..22c67194073 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -115,8 +115,14 @@ defm WriteFCmpY : X86SchedWritePair; // Floating point compare (YMM/ZMM). defm WriteFCom : X86SchedWritePair; // Floating point compare to flags. defm WriteFMul : X86SchedWritePair; // Floating point multiplication. defm WriteFMulY : X86SchedWritePair; // Floating point multiplication (YMM/ZMM). -defm WriteFDiv : X86SchedWritePair; // Floating point division. -defm WriteFDivY : X86SchedWritePair; // Floating point division (YMM/ZMM). +defm WriteFDiv : X86SchedWritePair; // Floating point division. +defm WriteFDivX : X86SchedWritePair; // Floating point division (XMM). +defm WriteFDivY : X86SchedWritePair; // Floating point division (YMM). +defm WriteFDivZ : X86SchedWritePair; // Floating point division (ZMM). +defm WriteFDiv64 : X86SchedWritePair; // Floating point division. +defm WriteFDiv64X : X86SchedWritePair; // Floating point division (XMM). +defm WriteFDiv64Y : X86SchedWritePair; // Floating point division (YMM). +defm WriteFDiv64Z : X86SchedWritePair; // Floating point division (ZMM). defm WriteFSqrt : X86SchedWritePair; // Floating point square root. defm WriteFSqrtX : X86SchedWritePair; // Floating point square root (XMM). defm WriteFSqrtY : X86SchedWritePair; // Floating point square root (YMM). @@ -268,6 +274,8 @@ def SchedWriteFCmp : X86SchedWriteWidths<WriteFCmp, WriteFCmp, WriteFCmpY, WriteFCmpY>; def SchedWriteFMul : X86SchedWriteWidths<WriteFMul, WriteFMul, WriteFMulY, WriteFMulY>; +def SchedWriteFMul64 + : X86SchedWriteWidths<WriteFMul, WriteFMul, WriteFMulY, WriteFMulY>; def SchedWriteFMA : X86SchedWriteWidths<WriteFMA, WriteFMAX, WriteFMAY, WriteFMAY>; def SchedWriteDPPD @@ -275,7 +283,9 @@ def SchedWriteDPPD def SchedWriteDPPS : X86SchedWriteWidths<WriteDPPS, WriteDPPS, WriteDPPSY, WriteDPPSY>; def SchedWriteFDiv - : X86SchedWriteWidths<WriteFDiv, WriteFDiv, WriteFDivY, WriteFDivY>; + : X86SchedWriteWidths<WriteFDiv, WriteFDivX, WriteFDivY, WriteFDivZ>; +def SchedWriteFDiv64 + : X86SchedWriteWidths<WriteFDiv64, WriteFDiv64X, WriteFDiv64Y, WriteFDiv64Z>; def SchedWriteFSqrt : X86SchedWriteWidths<WriteFSqrt, WriteFSqrtX, WriteFSqrtY, WriteFSqrtZ>; @@ -347,12 +357,18 @@ def SchedWriteVarBlend // Vector size wrappers. def SchedWriteFAddSizes : X86SchedWriteSizes<SchedWriteFAdd, SchedWriteFAdd>; +def SchedWriteFCmpSizes + : X86SchedWriteSizes<SchedWriteFCmp, SchedWriteFCmp>; def SchedWriteFMulSizes - : X86SchedWriteSizes<SchedWriteFMul, SchedWriteFMul>; + : X86SchedWriteSizes<SchedWriteFMul, SchedWriteFMul64>; def SchedWriteFDivSizes - : X86SchedWriteSizes<SchedWriteFDiv, SchedWriteFDiv>; + : X86SchedWriteSizes<SchedWriteFDiv, SchedWriteFDiv64>; def SchedWriteFSqrtSizes : X86SchedWriteSizes<SchedWriteFSqrt, SchedWriteFSqrt64>; +def SchedWriteFLogicSizes + : X86SchedWriteSizes<SchedWriteFLogic, SchedWriteFLogic>; +def SchedWriteFShuffleSizes + : X86SchedWriteSizes<SchedWriteFShuffle, SchedWriteFShuffle>; //===----------------------------------------------------------------------===// // Generic Processor Scheduler Models. diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 8ffa9e67400..77c8ae7ae63 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -217,7 +217,13 @@ defm : AtomWriteResPair<WriteFRsqrt, [AtomPort0], [AtomPort0], 4, 4, defm : AtomWriteResPair<WriteFRsqrtX, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>; defm : AtomWriteResPair<WriteFRsqrtY, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>; defm : AtomWriteResPair<WriteFDiv, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>; -defm : AtomWriteResPair<WriteFDivY, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>; +defm : AtomWriteResPair<WriteFDivX, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>; +defm : AtomWriteResPair<WriteFDivY, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>; +defm : AtomWriteResPair<WriteFDivZ, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>; +defm : AtomWriteResPair<WriteFDiv64, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>; +defm : AtomWriteResPair<WriteFDiv64X, [AtomPort01], [AtomPort01],125,125,[125],[125]>; +defm : AtomWriteResPair<WriteFDiv64Y, [AtomPort01], [AtomPort01],125,125,[125],[125]>; +defm : AtomWriteResPair<WriteFDiv64Z, [AtomPort01], [AtomPort01],125,125,[125],[125]>; defm : AtomWriteResPair<WriteFSqrt, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>; defm : AtomWriteResPair<WriteFSqrtX, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>; defm : AtomWriteResPair<WriteFSqrtY, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>; @@ -702,12 +708,6 @@ def AtomWrite01_59 : SchedWriteRes<[AtomPort01]> { } def : InstRW<[AtomWrite01_59], (instrs INSB, INSL, INSW)>; -def AtomWrite01_62 : SchedWriteRes<[AtomPort01]> { - let Latency = 62; - let ResourceCycles = [62]; -} -def : InstRW<[AtomWrite01_62], (instregex "DIVSD(r|m)(_Int)?")>; - def AtomWrite01_63 : SchedWriteRes<[AtomPort01]> { let Latency = 63; let ResourceCycles = [63]; @@ -720,12 +720,6 @@ def AtomWrite01_68 : SchedWriteRes<[AtomPort01]> { } def : InstRW<[AtomWrite01_68], (instrs OUT8rr, OUT16rr, OUT32rr)>; -def AtomWrite01_70 : SchedWriteRes<[AtomPort01]> { - let Latency = 70; - let ResourceCycles = [70]; -} -def : InstRW<[AtomWrite01_70], (instrs DIVPSrr, DIVPSrm)>; - def AtomWrite01_71 : SchedWriteRes<[AtomPort01]> { let Latency = 71; let ResourceCycles = [71]; @@ -788,12 +782,6 @@ def AtomWrite01_121 : SchedWriteRes<[AtomPort01]> { } def : InstRW<[AtomWrite01_121], (instrs CPUID)>; -def AtomWrite01_125 : SchedWriteRes<[AtomPort01]> { - let Latency = 125; - let ResourceCycles = [125]; -} -def : InstRW<[AtomWrite01_125], (instrs DIVPDrr, DIVPDrm)>; - def AtomWrite01_127 : SchedWriteRes<[AtomPort01]> { let Latency = 127; let ResourceCycles = [127]; diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 31e26b4579b..685ea3b4743 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -336,7 +336,13 @@ defm : JWriteResFpuPair<WriteFRsqrt, [JFPU1, JFPM], 2>; defm : JWriteResFpuPair<WriteFRsqrtX, [JFPU1, JFPM], 2>; defm : JWriteResYMMPair<WriteFRsqrtY, [JFPU1, JFPM], 2, [2,2], 2>; defm : JWriteResFpuPair<WriteFDiv, [JFPU1, JFPM], 19, [1, 19]>; +defm : JWriteResFpuPair<WriteFDivX, [JFPU1, JFPM], 19, [1, 19]>; defm : JWriteResYMMPair<WriteFDivY, [JFPU1, JFPM], 38, [2, 38], 2>; +defm : JWriteResYMMPair<WriteFDivZ, [JFPU1, JFPM], 38, [2, 38], 2>; +defm : JWriteResFpuPair<WriteFDiv64, [JFPU1, JFPM], 19, [1, 19]>; +defm : JWriteResFpuPair<WriteFDiv64X, [JFPU1, JFPM], 19, [1, 19]>; +defm : JWriteResYMMPair<WriteFDiv64Y, [JFPU1, JFPM], 38, [2, 38], 2>; +defm : JWriteResYMMPair<WriteFDiv64Z, [JFPU1, JFPM], 38, [2, 38], 2>; defm : JWriteResFpuPair<WriteFSqrt, [JFPU1, JFPM], 21, [1, 21]>; defm : JWriteResFpuPair<WriteFSqrtX, [JFPU1, JFPM], 21, [1, 21]>; defm : JWriteResYMMPair<WriteFSqrtY, [JFPU1, JFPM], 42, [2, 42], 2>; diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 9d1787fec6f..93de36b60e6 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -137,8 +137,14 @@ defm : SLMWriteResPair<WriteFCmpY, [SLM_FPC_RSV1], 3>; defm : SLMWriteResPair<WriteFCom, [SLM_FPC_RSV1], 3>; defm : SLMWriteResPair<WriteFMul, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>; defm : SLMWriteResPair<WriteFMulY, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>; -defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 34, [1,34]>; -defm : SLMWriteResPair<WriteFDivY, [SLM_FPC_RSV0, SLMFPDivider], 34, [1,34]>; +defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 19, [1,17]>; +defm : SLMWriteResPair<WriteFDivX, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>; +defm : SLMWriteResPair<WriteFDivY, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>; +defm : SLMWriteResPair<WriteFDivZ, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>; +defm : SLMWriteResPair<WriteFDiv64, [SLM_FPC_RSV0, SLMFPDivider], 34, [1,32]>; +defm : SLMWriteResPair<WriteFDiv64X, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>; +defm : SLMWriteResPair<WriteFDiv64Y, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>; +defm : SLMWriteResPair<WriteFDiv64Z, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>; defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 5>; defm : SLMWriteResPair<WriteFRcpX, [SLM_FPC_RSV0], 5>; defm : SLMWriteResPair<WriteFRcpY, [SLM_FPC_RSV0], 5>; @@ -333,62 +339,4 @@ defm : SLMWriteResPair<WriteFMA, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteFMAX, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteFMAY, [SLM_FPC_RSV0], 1>; -// Instruction overrides - -def SLMriteResGroup1 : SchedWriteRes<[SLM_FPC_RSV0,SLMFPDivider]> { - let Latency = 69; - let NumMicroOps = 1; - let ResourceCycles = [1,69]; -} -def: InstRW<[SLMriteResGroup1], (instregex "(V?)DIVPDrr")>; - -def SLMriteResGroup2 : SchedWriteRes<[SLM_FPC_RSV0,SLMFPDivider]> { - let Latency = 39; - let NumMicroOps = 1; - let ResourceCycles = [1,39]; -} -def: InstRW<[SLMriteResGroup2], (instregex "(V?)DIVPSrr")>; - -def SLMriteResGroup3 : SchedWriteRes<[SLM_FPC_RSV0,SLMFPDivider]> { - let Latency = 34; - let NumMicroOps = 1; - let ResourceCycles = [1,32]; -} -def: InstRW<[SLMriteResGroup3], (instregex "(V?)DIVSDrr")>; - -def SLMriteResGroup4 : SchedWriteRes<[SLM_FPC_RSV0,SLMFPDivider]> { - let Latency = 19; - let NumMicroOps = 1; - let ResourceCycles = [1,17]; -} -def: InstRW<[SLMriteResGroup4], (instregex "(V?)DIVSSrr")>; - -def SLMriteResGroup5 : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0,SLMFPDivider]> { - let Latency = 72; - let NumMicroOps = 1; - let ResourceCycles = [1,1,69]; -} -def: InstRW<[SLMriteResGroup5], (instregex "(V?)DIVPDrm")>; - -def SLMriteResGroup6 : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0,SLMFPDivider]> { - let Latency = 42; - let NumMicroOps = 1; - let ResourceCycles = [1,1,39]; -} -def: InstRW<[SLMriteResGroup6], (instregex "(V?)DIVPSrm")>; - -def SLMriteResGroup7 : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0,SLMFPDivider]> { - let Latency = 37; - let NumMicroOps = 1; - let ResourceCycles = [1,1,32]; -} -def: InstRW<[SLMriteResGroup7], (instregex "(V?)DIVSDrm")>; - -def SLMriteResGroup8 : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV0,SLMFPDivider]> { - let Latency = 22; - let NumMicroOps = 1; - let ResourceCycles = [1,1,17]; -} -def: InstRW<[SLMriteResGroup8], (instregex "(V?)DIVSSrm")>; - } // SchedModel diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index f5a0e9c950b..5c3408b93c0 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -207,7 +207,13 @@ defm : ZnWriteResFpuPair<WriteCvtI2F, [ZnFPU3], 5>; defm : ZnWriteResFpuPair<WriteCvtF2F, [ZnFPU3], 5>; defm : ZnWriteResFpuPair<WriteCvtF2I, [ZnFPU3], 5>; defm : ZnWriteResFpuPair<WriteFDiv, [ZnFPU3], 15>; -defm : ZnWriteResFpuPair<WriteFDivY, [ZnFPU3], 15>; +defm : ZnWriteResFpuPair<WriteFDivX, [ZnFPU3], 15>; +//defm : ZnWriteResFpuPair<WriteFDivY, [ZnFPU3], 15>; +defm : ZnWriteResFpuPair<WriteFDivZ, [ZnFPU3], 15>; +defm : ZnWriteResFpuPair<WriteFDiv64, [ZnFPU3], 15>; +defm : ZnWriteResFpuPair<WriteFDiv64X, [ZnFPU3], 15>; +//defm : ZnWriteResFpuPair<WriteFDiv64Y, [ZnFPU3], 15>; +defm : ZnWriteResFpuPair<WriteFDiv64Z, [ZnFPU3], 15>; defm : ZnWriteResFpuPair<WriteFSign, [ZnFPU3], 2>; defm : ZnWriteResFpuPair<WriteFRnd, [ZnFPU3], 4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops? defm : ZnWriteResFpuPair<WriteFRndY, [ZnFPU3], 4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops? @@ -1481,12 +1487,13 @@ def ZnWriteMULYLd : SchedWriteRes<[ZnAGU, ZnFPU01]> { def : InstRW<[ZnWriteMULYLd], (instregex "(V?)MUL(P|S)(S|D)Yrm")>; // VDIVPS. +// TODO - convert to ZnWriteResFpuPair // y,y,y. def ZnWriteVDIVPSYr : SchedWriteRes<[ZnFPU3]> { let Latency = 12; let ResourceCycles = [12]; } -def : InstRW<[ZnWriteVDIVPSYr], (instregex "VDIVPSYrr")>; +def : SchedAlias<WriteFDivY, ZnWriteVDIVPSYr>; // y,y,m256. def ZnWriteVDIVPSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> { @@ -1494,15 +1501,16 @@ def ZnWriteVDIVPSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> { let NumMicroOps = 2; let ResourceCycles = [1, 19]; } -def : InstRW<[ZnWriteVDIVPSYLd], (instregex "VDIVPSYrm")>; +def : SchedAlias<WriteFDivYLd, ZnWriteVDIVPSYLd>; // VDIVPD. +// TODO - convert to ZnWriteResFpuPair // y,y,y. def ZnWriteVDIVPDY : SchedWriteRes<[ZnFPU3]> { let Latency = 15; let ResourceCycles = [15]; } -def : InstRW<[ZnWriteVDIVPDY], (instregex "VDIVPDYrr")>; +def : SchedAlias<WriteFDiv64Y, ZnWriteVDIVPDY>; // y,y,m256. def ZnWriteVDIVPDYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> { @@ -1510,7 +1518,7 @@ def ZnWriteVDIVPDYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> { let NumMicroOps = 2; let ResourceCycles = [1,22]; } -def : InstRW<[ZnWriteVDIVPDYLd], (instregex "VDIVPDYrm")>; +def : SchedAlias<WriteFDiv64YLd, ZnWriteVDIVPDYLd>; // VRCPPS. // TODO - convert to ZnWriteResFpuPair |