diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-04-17 07:22:44 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-04-17 07:22:44 +0000 |
| commit | 86e3c2692403e31299cece3a6445575b775b1a2d (patch) | |
| tree | 3b1dc6415d4fe97ae0e134497d63bab34c02a60b /llvm/lib | |
| parent | 5b4a67af1b09295fcb9080bc68d92f30374331fb (diff) | |
| download | bcm5719-llvm-86e3c2692403e31299cece3a6445575b775b1a2d.tar.gz bcm5719-llvm-86e3c2692403e31299cece3a6445575b775b1a2d.zip | |
[X86] Add FP comparison scheduler classes
Split VCMP/VMAX/VMIN instructions off to WriteFCmp and VCOMIS instructions off to WriteFCom instead of assuming they match WriteFAdd
Differential Revision: https://reviews.llvm.org/D45656
llvm-svn: 330179
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 56 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFPStack.td | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 80 | ||||
| -rwxr-xr-x | llvm/lib/Target/X86/X86SchedBroadwell.td | 40 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedHaswell.td | 32 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedSandyBridge.td | 26 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedSkylakeClient.td | 38 | ||||
| -rwxr-xr-x | llvm/lib/Target/X86/X86SchedSkylakeServer.td | 104 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Schedule.td | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleAtom.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleBtVer2.td | 24 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleSLM.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleZnver1.td | 2 |
13 files changed, 105 insertions, 315 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index c5a76da3a5b..bd718ad19e5 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2051,10 +2051,10 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd, let Predicates = [HasAVX512] in { let ExeDomain = SSEPackedSingle in defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd, - WriteFAdd>, AVX512XSIi8Base; + WriteFCmp>, AVX512XSIi8Base; let ExeDomain = SSEPackedDouble in defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd, - WriteFAdd>, AVX512XDIi8Base, VEX_W; + WriteFCmp>, AVX512XDIi8Base, VEX_W; } multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode, @@ -2511,9 +2511,9 @@ multiclass avx512_vcmp<X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { } } -defm VCMPPD : avx512_vcmp<WriteFAdd, avx512vl_f64_info>, +defm VCMPPD : avx512_vcmp<WriteFCmp, avx512vl_f64_info>, AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; -defm VCMPPS : avx512_vcmp<WriteFAdd, avx512vl_f32_info>, +defm VCMPPS : avx512_vcmp<WriteFCmp, avx512vl_f32_info>, AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; @@ -4906,9 +4906,9 @@ defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, WriteFMul, 1>; defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, WriteFAdd, 0>; defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, WriteFDiv, 0>; defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fmins, X86fminRnds, - WriteFAdd, 0>; + WriteFCmp, 0>; defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds, - WriteFAdd, 0>; + WriteFCmp, 0>; // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use // X86fminc and X86fmaxc instead of X86fmin and X86fmax @@ -4932,19 +4932,19 @@ multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, } } defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, - WriteFAdd>, XS, EVEX_4V, VEX_LIG, + WriteFCmp>, XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, - WriteFAdd>, XD, VEX_W, EVEX_4V, VEX_LIG, + WriteFCmp>, XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, - WriteFAdd>, XS, EVEX_4V, VEX_LIG, + WriteFCmp>, XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, - WriteFAdd>, XD, VEX_W, EVEX_4V, VEX_LIG, + WriteFCmp>, XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, @@ -5050,13 +5050,13 @@ defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, WriteFAdd>, avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, WriteFAdd>; defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, WriteFDiv>, avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, WriteFDiv>; -defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, WriteFAdd, 0>, - avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, WriteFAdd>; -defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, WriteFAdd, 0>, - avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, WriteFAdd>; +defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, WriteFCmp, 0>, + avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, WriteFCmp>; +defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, WriteFCmp, 0>, + avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, WriteFCmp>; let isCodeGenOnly = 1 in { - defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, WriteFAdd, 1>; - defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, WriteFAdd, 1>; + defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, WriteFCmp, 1>; + defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, WriteFCmp, 1>; } defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI, WriteFAdd, 1>; defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, WriteFAdd, 0>; @@ -7732,44 +7732,44 @@ multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, } let Defs = [EFLAGS], Predicates = [HasAVX512] in { - defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFAdd>, + defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>, AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; - defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFAdd>, + defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>, AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; - defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFAdd>, + defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>, AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; - defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFAdd>, + defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>, AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; } let Defs = [EFLAGS], Predicates = [HasAVX512] in { defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32, - "ucomiss", WriteFAdd>, PS, EVEX, VEX_LIG, + "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64, - "ucomisd", WriteFAdd>, PD, EVEX, + "ucomisd", WriteFCom>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; let Pattern = []<dag> in { defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32, - "comiss", WriteFAdd>, PS, EVEX, VEX_LIG, + "comiss", WriteFCom>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64, - "comisd", WriteFAdd>, PD, EVEX, + "comisd", WriteFCom>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } let isCodeGenOnly = 1 in { defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, - sse_load_f32, "ucomiss", WriteFAdd>, PS, EVEX, VEX_LIG, + sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, - sse_load_f64, "ucomisd", WriteFAdd>, PD, EVEX, + sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, - sse_load_f32, "comiss", WriteFAdd>, PS, EVEX, VEX_LIG, + sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, - sse_load_f64, "comisd", WriteFAdd>, PD, EVEX, + sse_load_f64, "comisd", WriteFCom>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } } diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td index 23f986d2dee..19a5b406158 100644 --- a/llvm/lib/Target/X86/X86InstrFPStack.td +++ b/llvm/lib/Target/X86/X86InstrFPStack.td @@ -277,6 +277,8 @@ def SUB_FPrST0 : FPrST0PInst<MRM5r, "fsub{r}p\t$op">; def SUB_FST0r : FPST0rInst <MRM4r, "fsub\t$op">; def SUBR_FrST0 : FPrST0Inst <MRM4r, "fsub{|r}\t{%st(0), $op|$op, st(0)}">; def SUBR_FPrST0 : FPrST0PInst<MRM4r, "fsub{|r}p\t$op">; +} // SchedRW +let SchedRW = [WriteFCom] in { def COM_FST0r : FPST0rInst <MRM2r, "fcom\t$op">; def COMP_FST0r : FPST0rInst <MRM3r, "fcomp\t$op">; } // SchedRW @@ -320,7 +322,7 @@ defm SIN : FPUnary<fsin, MRM_FE, "fsin">; defm COS : FPUnary<fcos, MRM_FF, "fcos">; } -let SchedRW = [WriteFAdd] in { +let SchedRW = [WriteFCom] in { let hasSideEffects = 0 in { def TST_Fp32 : FpIf32<(outs), (ins RFP32:$src), OneArgFP, []>; def TST_Fp64 : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>; @@ -333,7 +335,7 @@ def TST_F : FPI<0xD9, MRM_E4, (outs), (ins), "ftst">; // Versions of FP instructions that take a single memory operand. Added for the // disassembler; remove as they are included with patterns elsewhere. -let SchedRW = [WriteFAddLd] in { +let SchedRW = [WriteFComLd] in { def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">; def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">; @@ -568,7 +570,7 @@ def LD_F1 : FPI<0xD9, MRM_E8, (outs), (ins), "fld1">; } // Floating point compares. -let SchedRW = [WriteFAdd] in { +let SchedRW = [WriteFCom] in { def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, [(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>; def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, @@ -578,7 +580,7 @@ def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, } // SchedRW } // Defs = [FPSW] -let SchedRW = [WriteFAdd] in { +let SchedRW = [WriteFCom] in { // CC = ST(0) cmp ST(i) let Defs = [EFLAGS, FPSW] in { def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 558903e9308..5b4f29c8059 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1854,23 +1854,23 @@ let ExeDomain = SSEPackedSingle in defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmps, f32, loadf32, "cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", - WriteFAdd>, XS, VEX_4V, VEX_LIG, VEX_WIG; + WriteFCmp>, XS, VEX_4V, VEX_LIG, VEX_WIG; let ExeDomain = SSEPackedDouble in defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmps, f64, loadf64, "cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", - WriteFAdd>, // same latency as 32 bit compare + WriteFCmp>, // same latency as 32 bit compare XD, VEX_4V, VEX_LIG, VEX_WIG; let Constraints = "$src1 = $dst" in { let ExeDomain = SSEPackedSingle in defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmps, f32, loadf32, "cmp${cc}ss\t{$src2, $dst|$dst, $src2}", - "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", WriteFAdd>, XS; + "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", WriteFCmp>, XS; let ExeDomain = SSEPackedDouble in defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmps, f64, loadf64, "cmp${cc}sd\t{$src2, $dst|$dst, $src2}", - "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", WriteFAdd>, XD; + "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", WriteFCmp>, XD; } multiclass sse12_cmp_scalar_int<Operand memop, Operand CC, @@ -1894,21 +1894,21 @@ let isCodeGenOnly = 1 in { let ExeDomain = SSEPackedSingle in defm VCMPSS : sse12_cmp_scalar_int<ssmem, AVXCC, int_x86_sse_cmp_ss, "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}", - WriteFAdd, sse_load_f32>, XS, VEX_4V; + WriteFCmp, sse_load_f32>, XS, VEX_4V; let ExeDomain = SSEPackedDouble in defm VCMPSD : sse12_cmp_scalar_int<sdmem, AVXCC, int_x86_sse2_cmp_sd, "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}", - WriteFAdd, sse_load_f64>, // same latency as f32 + WriteFCmp, sse_load_f64>, // same latency as f32 XD, VEX_4V; let Constraints = "$src1 = $dst" in { let ExeDomain = SSEPackedSingle in defm CMPSS : sse12_cmp_scalar_int<ssmem, SSECC, int_x86_sse_cmp_ss, "cmp${cc}ss\t{$src, $dst|$dst, $src}", - WriteFAdd, sse_load_f32>, XS; + WriteFCmp, sse_load_f32>, XS; let ExeDomain = SSEPackedDouble in defm CMPSD : sse12_cmp_scalar_int<sdmem, SSECC, int_x86_sse2_cmp_sd, "cmp${cc}sd\t{$src, $dst|$dst, $src}", - WriteFAdd, sse_load_f64>, XD; + WriteFCmp, sse_load_f64>, XD; } } @@ -1951,49 +1951,49 @@ let mayLoad = 1 in let Defs = [EFLAGS] in { defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, - "ucomiss", WriteFAdd>, PS, VEX, VEX_LIG, VEX_WIG; + "ucomiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG; defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, - "ucomisd", WriteFAdd>, PD, VEX, VEX_LIG, VEX_WIG; + "ucomisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG; let Pattern = []<dag> in { defm VCOMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32, - "comiss", WriteFAdd>, PS, VEX, VEX_LIG, VEX_WIG; + "comiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG; defm VCOMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64, - "comisd", WriteFAdd>, PD, VEX, VEX_LIG, VEX_WIG; + "comisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG; } let isCodeGenOnly = 1 in { defm VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, - sse_load_f32, "ucomiss", WriteFAdd>, PS, VEX, VEX_WIG; + sse_load_f32, "ucomiss", WriteFCom>, PS, VEX, VEX_WIG; defm VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, - sse_load_f64, "ucomisd", WriteFAdd>, PD, VEX, VEX_WIG; + sse_load_f64, "ucomisd", WriteFCom>, PD, VEX, VEX_WIG; defm VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, - sse_load_f32, "comiss", WriteFAdd>, PS, VEX, VEX_WIG; + sse_load_f32, "comiss", WriteFCom>, PS, VEX, VEX_WIG; defm VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, - sse_load_f64, "comisd", WriteFAdd>, PD, VEX, VEX_WIG; + sse_load_f64, "comisd", WriteFCom>, PD, VEX, VEX_WIG; } defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, - "ucomiss", WriteFAdd>, PS; + "ucomiss", WriteFCom>, PS; defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, - "ucomisd", WriteFAdd>, PD; + "ucomisd", WriteFCom>, PD; let Pattern = []<dag> in { defm COMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32, - "comiss", WriteFAdd>, PS; + "comiss", WriteFCom>, PS; defm COMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64, - "comisd", WriteFAdd>, PD; + "comisd", WriteFCom>, PD; } let isCodeGenOnly = 1 in { defm UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, - sse_load_f32, "ucomiss", WriteFAdd>, PS; + sse_load_f32, "ucomiss", WriteFCom>, PS; defm UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, - sse_load_f64, "ucomisd", WriteFAdd>, PD; + sse_load_f64, "ucomisd", WriteFCom>, PD; defm COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, - sse_load_f32, "comiss", WriteFAdd>, PS; + sse_load_f32, "comiss", WriteFCom>, PS; defm COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, - sse_load_f64, "comisd", WriteFAdd>, PD; + sse_load_f64, "comisd", WriteFCom>, PD; } } // Defs = [EFLAGS] @@ -2028,28 +2028,28 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, v4f32, "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", - WriteFAdd, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG; + WriteFCmp, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG; defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, v2f64, "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}", "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", - WriteFAdd, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG; + WriteFCmp, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG; defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, v8f32, "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", - WriteFAdd, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L; + WriteFCmp, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L; defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, v4f64, "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}", "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", - WriteFAdd, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L; + WriteFCmp, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L; let Constraints = "$src1 = $dst" in { defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, v4f32, "cmp${cc}ps\t{$src2, $dst|$dst, $src2}", "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}", - WriteFAdd, SSEPackedSingle, memopv4f32>, PS; + WriteFCmp, SSEPackedSingle, memopv4f32>, PS; defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, v2f64, "cmp${cc}pd\t{$src2, $dst|$dst, $src2}", "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}", - WriteFAdd, SSEPackedDouble, memopv2f64>, PD; + WriteFCmp, SSEPackedDouble, memopv2f64>, PD; } def CommutableCMPCC : PatLeaf<(imm), [{ @@ -2583,19 +2583,19 @@ let isCommutable = 0 in { defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, WriteFDiv>, basic_sse12_fp_binop_s<0x5E, "div", fdiv, WriteFDiv>, basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, WriteFDiv>; - defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, WriteFAdd>, - basic_sse12_fp_binop_s<0x5F, "max", X86fmax, WriteFAdd>, - basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, WriteFAdd>; - defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, WriteFAdd>, - basic_sse12_fp_binop_s<0x5D, "min", X86fmin, WriteFAdd>, - basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, WriteFAdd>; + defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, WriteFCmp>, + basic_sse12_fp_binop_s<0x5F, "max", X86fmax, WriteFCmp>, + basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, WriteFCmp>; + defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, WriteFCmp>, + basic_sse12_fp_binop_s<0x5D, "min", X86fmin, WriteFCmp>, + basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, WriteFCmp>; } let isCodeGenOnly = 1 in { - defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, WriteFAdd>, - basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, WriteFAdd>; - defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, WriteFAdd>, - basic_sse12_fp_binop_s<0x5D, "min", X86fminc, WriteFAdd>; + defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, WriteFCmp>, + basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, WriteFCmp>; + defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, WriteFCmp>, + basic_sse12_fp_binop_s<0x5D, "min", X86fminc, WriteFCmp>; } // Patterns used to select SSE scalar fp arithmetic instructions from diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 79a25959262..6c1f6fc8c13 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -154,7 +154,9 @@ def : WriteRes<WriteFLoad, [BWPort23]> { let Latency = 5; } def : WriteRes<WriteFStore, [BWPort237, BWPort4]>; def : WriteRes<WriteFMove, [BWPort5]>; -defm : BWWriteResPair<WriteFAdd, [BWPort1], 3>; // Floating point add/sub/compare. +defm : BWWriteResPair<WriteFAdd, [BWPort1], 3>; // Floating point add/sub. +defm : BWWriteResPair<WriteFCmp, [BWPort1], 3>; // Floating point compare. +defm : BWWriteResPair<WriteFCom, [BWPort1], 3>; // Floating point compare to flags. defm : BWWriteResPair<WriteFMul, [BWPort0], 5>; // Floating point multiplication. defm : BWWriteResPair<WriteFDiv, [BWPort0], 12>; // 10-14 cycles. // Floating point division. defm : BWWriteResPair<WriteFSqrt, [BWPort0], 15>; // Floating point square root. @@ -843,29 +845,13 @@ def: InstRW<[BWWriteResGroup27], (instregex "ADD_FPrST0", "(V?)ADDSSrr", "(V?)ADDSUBPD(Y?)rr", "(V?)ADDSUBPS(Y?)rr", - "(V?)CMPPD(Y?)rri", - "(V?)CMPPS(Y?)rri", - "(V?)CMPSDrr", - "(V?)CMPSSrr", - "(V?)COMISDrr", - "(V?)COMISSrr", "(V?)CVTDQ2PS(Y?)rr", "(V?)CVTPS2DQ(Y?)rr", "(V?)CVTTPS2DQ(Y?)rr", - "(V?)MAX(C?)PD(Y?)rr", - "(V?)MAX(C?)PS(Y?)rr", - "(V?)MAX(C?)SDrr", - "(V?)MAX(C?)SSrr", - "(V?)MIN(C?)PD(Y?)rr", - "(V?)MIN(C?)PS(Y?)rr", - "(V?)MIN(C?)SDrr", - "(V?)MIN(C?)SSrr", "(V?)SUBPD(Y?)rr", "(V?)SUBPS(Y?)rr", "(V?)SUBSDrr", - "(V?)SUBSSrr", - "(V?)UCOMISDrr", - "(V?)UCOMISSrr")>; + "(V?)SUBSSrr")>; def BWWriteResGroup27_16 : SchedWriteRes<[BWPort1, BWPort0156]> { let Latency = 3; @@ -1832,29 +1818,13 @@ def: InstRW<[BWWriteResGroup91], (instregex "MMX_CVTPI2PSirm", "(V?)ADDSSrm", "(V?)ADDSUBPDrm", "(V?)ADDSUBPSrm", - "(V?)CMPPDrmi", - "(V?)CMPPSrmi", - "(V?)CMPSDrm", - "(V?)CMPSSrm", - "(V?)COMISDrm", - "(V?)COMISSrm", "(V?)CVTDQ2PSrm", "(V?)CVTPS2DQrm", "(V?)CVTTPS2DQrm", - "(V?)MAX(C?)PDrm", - "(V?)MAX(C?)PSrm", - "(V?)MAX(C?)SDrm", - "(V?)MAX(C?)SSrm", - "(V?)MIN(C?)PDrm", - "(V?)MIN(C?)PSrm", - "(V?)MIN(C?)SDrm", - "(V?)MIN(C?)SSrm", "(V?)SUBPDrm", "(V?)SUBPSrm", "(V?)SUBSDrm", - "(V?)SUBSSrm", - "(V?)UCOMISDrm", - "(V?)UCOMISSrm")>; + "(V?)SUBSSrm")>; def BWWriteResGroup91_16 : SchedWriteRes<[BWPort1, BWPort0156, BWPort23]> { let Latency = 8; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 8022ddad111..d25420c420d 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -149,6 +149,8 @@ def : WriteRes<WriteFLoad, [HWPort23]> { let Latency = 5; } def : WriteRes<WriteFMove, [HWPort5]>; defm : HWWriteResPair<WriteFAdd, [HWPort1], 3>; +defm : HWWriteResPair<WriteFCmp, [HWPort1], 3, [1], 1, 6>; +defm : HWWriteResPair<WriteFCom, [HWPort1], 3>; defm : HWWriteResPair<WriteFMul, [HWPort0], 5>; defm : HWWriteResPair<WriteFDiv, [HWPort0], 12>; // 10-14 cycles. defm : HWWriteResPair<WriteFRcp, [HWPort0], 5>; @@ -1041,16 +1043,12 @@ def: InstRW<[HWWriteResGroup12], (instregex "FCOM32m", "(V?)ADDSSrm", "(V?)CMPSDrm", "(V?)CMPSSrm", - "(V?)COMISDrm", - "(V?)COMISSrm", "(V?)MAX(C?)SDrm", "(V?)MAX(C?)SSrm", "(V?)MIN(C?)SDrm", "(V?)MIN(C?)SSrm", "(V?)SUBSDrm", - "(V?)SUBSSrm", - "(V?)UCOMISDrm", - "(V?)UCOMISSrm")>; + "(V?)SUBSSrm")>; def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> { let Latency = 7; @@ -1730,29 +1728,13 @@ def: InstRW<[HWWriteResGroup50], (instregex "ADD_FPrST0", "(V?)ADDSSrr", "(V?)ADDSUBPD(Y?)rr", "(V?)ADDSUBPS(Y?)rr", - "(V?)CMPPD(Y?)rri", - "(V?)CMPPS(Y?)rri", - "(V?)CMPSDrr", - "(V?)CMPSSrr", - "(V?)COMISDrr", - "(V?)COMISSrr", "(V?)CVTDQ2PS(Y?)rr", "(V?)CVTPS2DQ(Y?)rr", "(V?)CVTTPS2DQ(Y?)rr", - "(V?)MAX(C?)PD(Y?)rr", - "(V?)MAX(C?)PS(Y?)rr", - "(V?)MAX(C?)SDrr", - "(V?)MAX(C?)SSrr", - "(V?)MIN(C?)PD(Y?)rr", - "(V?)MIN(C?)PS(Y?)rr", - "(V?)MIN(C?)SDrr", - "(V?)MIN(C?)SSrr", "(V?)SUBPD(Y?)rr", "(V?)SUBPS(Y?)rr", "(V?)SUBSDrr", - "(V?)SUBSSrr", - "(V?)UCOMISDrr", - "(V?)UCOMISSrr")>; + "(V?)SUBSSrr")>; def HWWriteResGroup50_16i : SchedWriteRes<[HWPort1, HWPort0156]> { let Latency = 3; @@ -1804,15 +1786,9 @@ def: InstRW<[HWWriteResGroup52], (instregex "(V?)ADDPDrm", "(V?)ADDPSrm", "(V?)ADDSUBPDrm", "(V?)ADDSUBPSrm", - "(V?)CMPPDrmi", - "(V?)CMPPSrmi", "(V?)CVTDQ2PSrm", "(V?)CVTPS2DQrm", "(V?)CVTTPS2DQrm", - "(V?)MAX(C?)PDrm", - "(V?)MAX(C?)PSrm", - "(V?)MIN(C?)PDrm", - "(V?)MIN(C?)PSrm", "(V?)SUBPDrm", "(V?)SUBPSrm")>; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index aefbfb64cf8..c95771b4c2c 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -139,6 +139,8 @@ def : WriteRes<WriteFLoad, [SBPort23]> { let Latency = 6; } def : WriteRes<WriteFMove, [SBPort5]>; defm : SBWriteResPair<WriteFAdd, [SBPort1], 3>; +defm : SBWriteResPair<WriteFCmp, [SBPort1], 3, [1], 1, 6>; +defm : SBWriteResPair<WriteFCom, [SBPort1], 3>; defm : SBWriteResPair<WriteFMul, [SBPort0], 5>; defm : SBWriteResPair<WriteFDiv, [SBPort0], 24>; defm : SBWriteResPair<WriteFRcp, [SBPort0], 5>; @@ -685,21 +687,9 @@ def: InstRW<[SBWriteResGroup21], (instregex "ADD_FPrST0", "(V?)ADDSSrr", "(V?)ADDSUBPD(Y?)rr", "(V?)ADDSUBPS(Y?)rr", - "(V?)CMPPD(Y?)rri", - "(V?)CMPPS(Y?)rri", - "(V?)CMPSDrr", - "(V?)CMPSSrr", "(V?)CVTDQ2PS(Y?)rr", "(V?)CVTPS2DQ(Y?)rr", "(V?)CVTTPS2DQ(Y?)rr", - "(V?)MAX(C?)PD(Y?)rr", - "(V?)MAX(C?)PS(Y?)rr", - "(V?)MAX(C?)SDrr", - "(V?)MAX(C?)SSrr", - "(V?)MIN(C?)PD(Y?)rr", - "(V?)MIN(C?)PS(Y?)rr", - "(V?)MIN(C?)SDrr", - "(V?)MIN(C?)SSrr", "(V?)ROUNDPD(Y?)r", "(V?)ROUNDPS(Y?)r", "(V?)ROUNDSDr", @@ -1562,23 +1552,11 @@ def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPI2PSirm", "(V?)ADDSSrm", "(V?)ADDSUBPDrm", "(V?)ADDSUBPSrm", - "(V?)CMPPDrmi", - "(V?)CMPPSrmi", - "(V?)CMPSDrm", - "(V?)CMPSSrm", "(V?)CVTDQ2PSrm", "(V?)CVTPS2DQrm", "(V?)CVTSI642SDrm", "(V?)CVTSI2SDrm", "(V?)CVTTPS2DQrm", - "(V?)MAX(C?)PDrm", - "(V?)MAX(C?)PSrm", - "(V?)MAX(C?)SDrm", - "(V?)MAX(C?)SSrm", - "(V?)MIN(C?)PDrm", - "(V?)MIN(C?)PSrm", - "(V?)MIN(C?)SDrm", - "(V?)MIN(C?)SSrm", "(V?)ROUNDPDm", "(V?)ROUNDPSm", "(V?)ROUNDSDm", diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 6511206992d..b36f5797bb5 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -151,7 +151,9 @@ def : WriteRes<WriteFLoad, [SKLPort23]> { let Latency = 6; } def : WriteRes<WriteFStore, [SKLPort237, SKLPort4]>; def : WriteRes<WriteFMove, [SKLPort015]>; -defm : SKLWriteResPair<WriteFAdd, [SKLPort1], 3>; // Floating point add/sub/compare. +defm : SKLWriteResPair<WriteFAdd, [SKLPort1], 3>; // Floating point add/sub. +defm : SKLWriteResPair<WriteFCmp, [SKLPort01], 4, [1], 1, 6>; // Floating point compare. +defm : SKLWriteResPair<WriteFCom, [SKLPort0], 2>; // Floating point compare to flags. defm : SKLWriteResPair<WriteFMul, [SKLPort0], 5>; // Floating point multiplication. defm : SKLWriteResPair<WriteFDiv, [SKLPort0], 12>; // 10-14 cycles. // Floating point division. defm : SKLWriteResPair<WriteFSqrt, [SKLPort0], 15>; // Floating point square root. @@ -672,14 +674,10 @@ def SKLWriteResGroup12 : SchedWriteRes<[SKLPort0]> { } def: InstRW<[SKLWriteResGroup12], (instregex "MMX_MOVD64from64rr", "MMX_MOVD64grr", - "(V?)COMISDrr", - "(V?)COMISSrr", "(V?)MOVPDI2DIrr", "(V?)MOVPQIto64rr", "VTESTPD(Y?)rr", - "VTESTPS(Y?)rr", - "(V?)UCOMISDrr", - "(V?)UCOMISSrr")>; + "VTESTPS(Y?)rr")>; def SKLWriteResGroup13 : SchedWriteRes<[SKLPort5]> { let Latency = 2; @@ -1067,21 +1065,9 @@ def: InstRW<[SKLWriteResGroup48], (instregex "(V?)ADDPD(Y?)rr", "(V?)ADDSSrr", "(V?)ADDSUBPD(Y?)rr", "(V?)ADDSUBPS(Y?)rr", - "(V?)CMPPD(Y?)rri", - "(V?)CMPPS(Y?)rri", - "(V?)CMPSDrr", - "(V?)CMPSSrr", "(V?)CVTDQ2PS(Y?)rr", "(V?)CVTPS2DQ(Y?)rr", "(V?)CVTTPS2DQ(Y?)rr", - "(V?)MAX(C?)PD(Y?)rr", - "(V?)MAX(C?)PS(Y?)rr", - "(V?)MAX(C?)SDrr", - "(V?)MAX(C?)SSrr", - "(V?)MIN(C?)PD(Y?)rr", - "(V?)MIN(C?)PS(Y?)rr", - "(V?)MIN(C?)SDrr", - "(V?)MIN(C?)SSrr", "(V?)MULPD(Y?)rr", "(V?)MULPS(Y?)rr", "(V?)MULSDrr", @@ -1547,16 +1533,6 @@ def SKLWriteResGroup86 : SchedWriteRes<[SKLPort0,SKLPort5]> { } def: InstRW<[SKLWriteResGroup86], (instregex "VCVTDQ2PDYrr")>; -def SKLWriteResGroup87 : SchedWriteRes<[SKLPort0,SKLPort23]> { - let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup87], (instregex "(V?)COMISDrm", - "(V?)COMISSrm", - "(V?)UCOMISDrm", - "(V?)UCOMISSrm")>; - def SKLWriteResGroup88 : SchedWriteRes<[SKLPort5,SKLPort23]> { let Latency = 7; let NumMicroOps = 2; @@ -2196,17 +2172,11 @@ def: InstRW<[SKLWriteResGroup134], (instregex "(V?)ADDPDrm", "(V?)ADDPSrm", "(V?)ADDSUBPDrm", "(V?)ADDSUBPSrm", - "(V?)CMPPDrmi", - "(V?)CMPPSrmi", "(V?)CVTDQ2PSrm", "(V?)CVTPH2PSYrm", "(V?)CVTPS2DQrm", "(V?)CVTSS2SDrm", "(V?)CVTTPS2DQrm", - "(V?)MAX(C?)PDrm", - "(V?)MAX(C?)PSrm", - "(V?)MIN(C?)PDrm", - "(V?)MIN(C?)PSrm", "(V?)MULPDrm", "(V?)MULPSrm", "(V?)PHMINPOSUWrm", diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 5a79f240273..283a3ed37e5 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -151,7 +151,9 @@ def : WriteRes<WriteFLoad, [SKXPort23]> { let Latency = 5; } def : WriteRes<WriteFStore, [SKXPort237, SKXPort4]>; def : WriteRes<WriteFMove, [SKXPort015]>; -defm : SKXWriteResPair<WriteFAdd, [SKXPort1], 3>; // Floating point add/sub/compare. +defm : SKXWriteResPair<WriteFAdd, [SKXPort1], 3>; // Floating point add/sub. +defm : SKXWriteResPair<WriteFCmp, [SKXPort015], 4, [1], 1, 6>; // Floating point compare. +defm : SKXWriteResPair<WriteFCom, [SKXPort0], 2>; // Floating point compare to flags. defm : SKXWriteResPair<WriteFMul, [SKXPort0], 5>; // Floating point multiplication. defm : SKXWriteResPair<WriteFDiv, [SKXPort0], 12>; // 10-14 cycles. // Floating point division. defm : SKXWriteResPair<WriteFSqrt, [SKXPort0], 15>; // Floating point square root. @@ -1406,18 +1408,10 @@ def SKXWriteResGroup12 : SchedWriteRes<[SKXPort0]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKXWriteResGroup12], (instregex "COMISDrr", - "COMISSrr", - "MMX_MOVD64from64rr", +def: InstRW<[SKXWriteResGroup12], (instregex "MMX_MOVD64from64rr", "MMX_MOVD64grr", "MOVPDI2DIrr", "MOVPQIto64rr", - "UCOMISDrr", - "UCOMISSrr", - "VCOMISDZrr(b?)", - "VCOMISDrr", - "VCOMISSZrr(b?)", - "VCOMISSrr", "VMOVPDI2DIZrr", "VMOVPDI2DIrr", "VMOVPQIto64Zrr", @@ -1425,11 +1419,7 @@ def: InstRW<[SKXWriteResGroup12], (instregex "COMISDrr", "VTESTPDYrr", "VTESTPDrr", "VTESTPSYrr", - "VTESTPSrr", - "VUCOMISDZrr(b?)", - "VUCOMISDrr", - "VUCOMISSZrr(b?)", - "VUCOMISSrr")>; + "VTESTPSrr")>; def SKXWriteResGroup13 : SchedWriteRes<[SKXPort5]> { let Latency = 2; @@ -2162,21 +2152,9 @@ def: InstRW<[SKXWriteResGroup50], (instregex "ADDPDrr", "ADDSSrr", "ADDSUBPDrr", "ADDSUBPSrr", - "CMPPDrri", - "CMPPSrri", - "CMPSDrr", - "CMPSSrr", "CVTDQ2PSrr", "CVTPS2DQrr", "CVTTPS2DQrr", - "MAX(C?)PDrr", - "MAX(C?)PSrr", - "MAX(C?)SDrr", - "MAX(C?)SSrr", - "MIN(C?)PDrr", - "MIN(C?)PSrr", - "MIN(C?)SDrr", - "MIN(C?)SSrr", "MULPDrr", "MULPSrr", "MULSDrr", @@ -2212,12 +2190,6 @@ def: InstRW<[SKXWriteResGroup50], (instregex "ADDPDrr", "VADDSUBPDrr", "VADDSUBPSYrr", "VADDSUBPSrr", - "VCMPPDYrri", - "VCMPPDrri", - "VCMPPSYrri", - "VCMPPSrri", - "VCMPSDrr", - "VCMPSSrr", "VCVTDQ2PSYrr", "VCVTDQ2PSZ128rr", "VCVTDQ2PSZ256rr", @@ -2284,34 +2256,6 @@ def: InstRW<[SKXWriteResGroup50], (instregex "ADDPDrr", "VGETMANTPSZrri", "VGETMANTSDZ128rri", "VGETMANTSSZ128rri", - "VMAX(C?)PDYrr", - "VMAX(C?)PDZ128rr", - "VMAX(C?)PDZ256rr", - "VMAX(C?)PDZrr", - "VMAX(C?)PDrr", - "VMAX(C?)PSYrr", - "VMAX(C?)PSZ128rr", - "VMAX(C?)PSZ256rr", - "VMAX(C?)PSZrr", - "VMAX(C?)PSrr", - "VMAX(C?)SDZrr", - "VMAX(C?)SDrr", - "VMAX(C?)SSZrr", - "VMAX(C?)SSrr", - "VMIN(C?)PDYrr", - "VMIN(C?)PDZ128rr", - "VMIN(C?)PDZ256rr", - "VMIN(C?)PDZrr", - "VMIN(C?)PDrr", - "VMIN(C?)PSYrr", - "VMIN(C?)PSZ128rr", - "VMIN(C?)PSZ256rr", - "VMIN(C?)PSZrr", - "VMIN(C?)PSrr", - "VMIN(C?)SDZrr", - "VMIN(C?)SDrr", - "VMIN(C?)SSZrr", - "VMIN(C?)SSrr", "VMULPDYrr", "VMULPDZ128rr", "VMULPDZ256rr", @@ -3145,24 +3089,6 @@ def SKXWriteResGroup90 : SchedWriteRes<[SKXPort0,SKXPort5]> { } def: InstRW<[SKXWriteResGroup90], (instregex "VCVTDQ2PDYrr")>; -def SKXWriteResGroup91 : SchedWriteRes<[SKXPort0,SKXPort23]> { - let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKXWriteResGroup91], (instregex "COMISDrm", - "COMISSrm", - "UCOMISDrm", - "UCOMISSrm", - "VCOMISDZrm(b?)", - "VCOMISDrm", - "VCOMISSZrm(b?)", - "VCOMISSrm", - "VUCOMISDZrm(b?)", - "VUCOMISDrm", - "VUCOMISSZrm(b?)", - "VUCOMISSrm")>; - def SKXWriteResGroup92 : SchedWriteRes<[SKXPort5,SKXPort23]> { let Latency = 7; let NumMicroOps = 2; @@ -4744,16 +4670,10 @@ def: InstRW<[SKXWriteResGroup149], (instregex "ADDPDrm", "ADDPSrm", "ADDSUBPDrm", "ADDSUBPSrm", - "CMPPDrmi", - "CMPPSrmi", "CVTDQ2PSrm", "CVTPS2DQrm", "CVTSS2SDrm", "CVTTPS2DQrm", - "MAX(C?)PDrm", - "MAX(C?)PSrm", - "MIN(C?)PDrm", - "MIN(C?)PSrm", "MULPDrm", "MULPSrm", "PHMINPOSUWrm", @@ -4775,8 +4695,6 @@ def: InstRW<[SKXWriteResGroup149], (instregex "ADDPDrm", "VADDSSZrm", "VADDSUBPDrm", "VADDSUBPSrm", - "VCMPPDrmi", - "VCMPPSrmi", "VCVTDQ2PDZ128rm(b?)", "VCVTDQ2PSZ128rm(b?)", "VCVTDQ2PSrm", @@ -4817,18 +4735,6 @@ def: InstRW<[SKXWriteResGroup149], (instregex "ADDPDrm", "VGETMANTPSZ128rm(b?)i", "VGETMANTSDZ128rmi(b?)", "VGETMANTSSZ128rmi(b?)", - "VMAX(C?)PDZ128rm(b?)", - "VMAX(C?)PDrm", - "VMAX(C?)PSZ128rm(b?)", - "VMAX(C?)PSrm", - "VMAX(C?)SDZrm", - "VMAX(C?)SSZrm", - "VMIN(C?)PDZ128rm(b?)", - "VMIN(C?)PDrm", - "VMIN(C?)PSZ128rm(b?)", - "VMIN(C?)PSrm", - "VMIN(C?)SDZrm", - "VMIN(C?)SSZrm", "VMULPDZ128rm(b?)", "VMULPDrm", "VMULPSZ128rm(b?)", diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index a3d715dbcb2..cb709e9f698 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -78,7 +78,9 @@ defm WriteJump : X86SchedWritePair; def WriteFLoad : SchedWrite; def WriteFStore : SchedWrite; def WriteFMove : SchedWrite; -defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare. +defm WriteFAdd : X86SchedWritePair; // Floating point add/sub. +defm WriteFCmp : X86SchedWritePair; // Floating point compare. +defm WriteFCom : X86SchedWritePair; // Floating point compare to flags. defm WriteFMul : X86SchedWritePair; // Floating point multiplication. defm WriteFDiv : X86SchedWritePair; // Floating point division. defm WriteFSqrt : X86SchedWritePair; // Floating point square root. diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 8305c6c1e94..fa398d0e785 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -203,6 +203,8 @@ def : WriteRes<WriteFStore, [AtomPort0]>; def : WriteRes<WriteFMove, [AtomPort01]>; defm : AtomWriteResPair<WriteFAdd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; +defm : AtomWriteResPair<WriteFCmp, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; +defm : AtomWriteResPair<WriteFCom, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; defm : AtomWriteResPair<WriteFMul, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>; defm : AtomWriteResPair<WriteFRcp, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>; defm : AtomWriteResPair<WriteFRsqrt, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>; diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 2ab593af326..3e4c0a6d5e0 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -294,6 +294,8 @@ def : WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC]>; def : WriteRes<WriteFMove, [JFPU01, JFPX]>; defm : JWriteResFpuPair<WriteFAdd, [JFPU0, JFPA], 3>; +defm : JWriteResFpuPair<WriteFCmp, [JFPU0, JFPA], 2>; +defm : JWriteResFpuPair<WriteFCom, [JFPU0, JFPA, JALU0], 3>; defm : JWriteResFpuPair<WriteFMul, [JFPU1, JFPM], 2>; defm : JWriteResFpuPair<WriteFMA, [JFPU1, JFPM], 2>; // NOTE: Doesn't exist on Jaguar. defm : JWriteResFpuPair<WriteFRcp, [JFPU1, JFPM], 2>; @@ -704,28 +706,6 @@ def JWriteVMOVNTPYSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> { } def : InstRW<[JWriteVMOVNTPYSt], (instrs VMOVNTDQYmr, VMOVNTPDYmr, VMOVNTPSYmr)>; -def JWriteFComi : SchedWriteRes<[JFPU0, JFPA, JALU0]> { - let Latency = 3; -} -def : InstRW<[JWriteFComi], (instregex "(V)?(U)?COMIS(D|S)rr")>; - -def JWriteFComiLd : SchedWriteRes<[JLAGU, JFPU0, JFPA, JALU0]> { - let Latency = 8; -} -def : InstRW<[JWriteFComiLd], (instregex "(V)?(U)?COMIS(D|S)rm")>; - -def JWriteFCmp: SchedWriteRes<[JFPU0, JFPA]> { - let Latency = 2; -} -def : InstRW<[JWriteFCmp], (instregex "(V)?M(AX|IN)(P|S)(D|S)rr", - "(V)?CMPP(S|D)rri", "(V)?CMPS(S|D)rr")>; - -def JWriteFCmpLd: SchedWriteRes<[JLAGU, JFPU0, JFPA]> { - let Latency = 7; -} -def : InstRW<[JWriteFCmpLd], (instregex "(V)?M(AX|IN)(P|S)(D|S)rm", - "(V)?CMPP(S|D)rmi", "(V)?CMPS(S|D)rm")>; - def JWriteFCmpY: SchedWriteRes<[JFPU0, JFPA]> { let Latency = 2; let ResourceCycles = [2, 2]; diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 8147c94a2e7..8e913b48a12 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -125,6 +125,8 @@ def : WriteRes<WriteFLoad, [SLM_MEC_RSV]> { let Latency = 3; } def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>; defm : SLMWriteResPair<WriteFAdd, [SLM_FPC_RSV1], 3>; +defm : SLMWriteResPair<WriteFCmp, [SLM_FPC_RSV1], 3>; +defm : SLMWriteResPair<WriteFCom, [SLM_FPC_RSV1], 3>; defm : SLMWriteResPair<WriteFMul, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>; defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 34, [1,34]>; defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 5>; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 2b775b44316..dc9438c8dae 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -193,6 +193,8 @@ def : WriteRes<WriteFLoad, [ZnAGU]> { let Latency = 8; } defm : ZnWriteResFpuPair<WriteFHAdd, [ZnFPU0], 3>; defm : ZnWriteResFpuPair<WriteFAdd, [ZnFPU0], 3>; +defm : ZnWriteResFpuPair<WriteFCmp, [ZnFPU0], 3>; +defm : ZnWriteResFpuPair<WriteFCom, [ZnFPU0], 3>; defm : ZnWriteResFpuPair<WriteFBlend, [ZnFPU01], 1>; defm : ZnWriteResFpuPair<WriteFVarBlend, [ZnFPU01], 1>; defm : ZnWriteResFpuPair<WriteVarBlend, [ZnFPU0], 1>; |

