diff options
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 55 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 32 | ||||
-rwxr-xr-x | llvm/lib/Target/X86/X86SchedBroadwell.td | 17 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedHaswell.td | 17 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedSandyBridge.td | 15 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedSkylakeClient.td | 15 | ||||
-rwxr-xr-x | llvm/lib/Target/X86/X86SchedSkylakeServer.td | 13 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86Schedule.td | 3 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleAtom.td | 3 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleBtVer2.td | 23 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleSLM.td | 3 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleZnver1.td | 3 |
12 files changed, 93 insertions, 106 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index c39429ea3b2..02497ce72b3 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1932,37 +1932,45 @@ multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr, } multiclass blendmask_dq<bits<8> opc, string OpcodeStr, - X86FoldableSchedWrite sched, + X86FoldableSchedWrite sched128, + X86FoldableSchedWrite sched256, AVX512VLVectorVTInfo VTInfo> { - defm Z : WriteFVarBlendask <opc, OpcodeStr, sched, VTInfo.info512>, - WriteFVarBlendask_rmb <opc, OpcodeStr, sched, VTInfo.info512>, EVEX_V512; + defm Z : WriteFVarBlendask <opc, OpcodeStr, sched256, VTInfo.info512>, + WriteFVarBlendask_rmb <opc, OpcodeStr, sched256, VTInfo.info512>, EVEX_V512; let Predicates = [HasVLX] in { - defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched, VTInfo.info256>, - WriteFVarBlendask_rmb<opc, OpcodeStr, sched, VTInfo.info256>, EVEX_V256; - defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched, VTInfo.info128>, - WriteFVarBlendask_rmb<opc, OpcodeStr, sched, VTInfo.info128>, EVEX_V128; + defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched256, VTInfo.info256>, + WriteFVarBlendask_rmb<opc, OpcodeStr, sched256, VTInfo.info256>, EVEX_V256; + defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched128, VTInfo.info128>, + WriteFVarBlendask_rmb<opc, OpcodeStr, sched128, VTInfo.info128>, EVEX_V128; } } multiclass blendmask_bw<bits<8> opc, string OpcodeStr, - X86FoldableSchedWrite sched, + X86FoldableSchedWrite sched128, + X86FoldableSchedWrite sched256, AVX512VLVectorVTInfo VTInfo> { let Predicates = [HasBWI] in - defm Z : WriteFVarBlendask<opc, OpcodeStr, sched, VTInfo.info512>, EVEX_V512; + defm Z : WriteFVarBlendask<opc, OpcodeStr, sched256, VTInfo.info512>, EVEX_V512; let Predicates = [HasBWI, HasVLX] in { - defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched, VTInfo.info256>, EVEX_V256; - defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched, VTInfo.info128>, EVEX_V128; + defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched256, VTInfo.info256>, EVEX_V256; + defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched128, VTInfo.info128>, EVEX_V128; } } -defm VBLENDMPS : blendmask_dq <0x65, "vblendmps", WriteFVarBlend, avx512vl_f32_info>; -defm VBLENDMPD : blendmask_dq <0x65, "vblendmpd", WriteFVarBlend, avx512vl_f64_info>, VEX_W; -defm VPBLENDMD : blendmask_dq <0x64, "vpblendmd", WriteVarBlend, avx512vl_i32_info>; -defm VPBLENDMQ : blendmask_dq <0x64, "vpblendmq", WriteVarBlend, avx512vl_i64_info>, VEX_W; -defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", WriteVarBlend, avx512vl_i8_info>; -defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", WriteVarBlend, avx512vl_i16_info>, VEX_W; +defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", WriteFVarBlend, WriteFVarBlendY, + avx512vl_f32_info>; +defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", WriteFVarBlend, WriteFVarBlendY, + avx512vl_f64_info>, VEX_W; +defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", WriteVarBlend, WriteVarBlend, + avx512vl_i32_info>; +defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", WriteVarBlend, WriteVarBlend, + avx512vl_i64_info>, VEX_W; +defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", WriteVarBlend, WriteVarBlend, + avx512vl_i8_info>; +defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", WriteVarBlend, WriteVarBlend, + avx512vl_i16_info>, VEX_W; //===----------------------------------------------------------------------===// // Compare Instructions @@ -5967,23 +5975,26 @@ multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode, } multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar, - X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _, + X86FoldableSchedWrite sched128, + X86FoldableSchedWrite sched256, + AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl> { let Predicates = [HasAVX512] in { - defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched, + defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched256, _.info512, Ctrl.info512>, EVEX_V512; } let Predicates = [HasAVX512, HasVLX] in { - defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched, + defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched128, _.info128, Ctrl.info128>, EVEX_V128; - defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched, + defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched256, _.info256, Ctrl.info256>, EVEX_V256; } } multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar, AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ - defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, WriteFVarShuffle, _, Ctrl>; + defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, WriteFVarShuffle, + WriteFVarShuffleY, _, Ctrl>; defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr, X86VPermilpi, WriteFShuffle, _>, EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 6f6afe74cdb..8f88b9b333b 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -6049,7 +6049,7 @@ let Predicates = [HasAVX] in { VEX_4V, VEX_WIG; defm VBLENDPSY : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v8f32, VR256, loadv8f32, f256mem, 0, SSEPackedSingle, - WriteFBlend, BlendCommuteImm8>, + WriteFBlendY, BlendCommuteImm8>, VEX_4V, VEX_L, VEX_WIG; defm VBLENDPD : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v2f64, VR128, loadv2f64, f128mem, 0, SSEPackedDouble, @@ -6057,7 +6057,7 @@ let Predicates = [HasAVX] in { VEX_4V, VEX_WIG; defm VBLENDPDY : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v4f64, VR256, loadv4f64, f256mem, 0, SSEPackedDouble, - WriteFBlend, BlendCommuteImm4>, + WriteFBlendY, BlendCommuteImm4>, VEX_4V, VEX_L, VEX_WIG; defm VPBLENDW : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v8i16, VR128, loadv2i64, i128mem, 0, SSEPackedInt, @@ -6130,7 +6130,7 @@ defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, f128mem, WriteFVarBlend>; defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem, loadv4f64, int_x86_avx_blendv_pd_256, - WriteFVarBlend>, VEX_L; + WriteFVarBlendY>, VEX_L; } // ExeDomain = SSEPackedDouble let ExeDomain = SSEPackedSingle in { defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, f128mem, @@ -6138,7 +6138,7 @@ defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, f128mem, WriteFVarBlend>; defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, f256mem, loadv8f32, int_x86_avx_blendv_ps_256, - WriteFVarBlend>, VEX_L; + WriteFVarBlendY>, VEX_L; } // ExeDomain = SSEPackedSingle defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem, loadv2i64, int_x86_sse41_pblendvb, @@ -7156,45 +7156,51 @@ defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop_f, X86MemOperand x86memop_i, PatFrag i_frag, - ValueType f_vt, ValueType i_vt> { + ValueType f_vt, ValueType i_vt, + X86FoldableSchedWrite sched, + X86FoldableSchedWrite varsched> { let Predicates = [HasAVX, NoVLX] in { def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, (i_vt RC:$src2))))]>, VEX_4V, - Sched<[WriteFVarShuffle]>; + Sched<[varsched]>; def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop_i:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, (i_vt (bitconvert (i_frag addr:$src2))))))]>, VEX_4V, - Sched<[WriteFVarShuffleLd, ReadAfterLd]>; + Sched<[varsched.Folded, ReadAfterLd]>; def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (f_vt (X86VPermilpi RC:$src1, (i8 imm:$src2))))]>, VEX, - Sched<[WriteFShuffle]>; + Sched<[sched]>; def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst), (ins x86memop_f:$src1, u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (f_vt (X86VPermilpi (load addr:$src1), (i8 imm:$src2))))]>, VEX, - Sched<[WriteFShuffleLd]>; + Sched<[sched.Folded]>; }// Predicates = [HasAVX, NoVLX] } let ExeDomain = SSEPackedSingle in { defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, - loadv2i64, v4f32, v4i32>; + loadv2i64, v4f32, v4i32, WriteFShuffle, + WriteFVarShuffle>; defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, - loadv4i64, v8f32, v8i32>, VEX_L; + loadv4i64, v8f32, v8i32, WriteFShuffle, + WriteFVarShuffleY>, VEX_L; } let ExeDomain = SSEPackedDouble in { defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem, - loadv2i64, v2f64, v2i64>; + loadv2i64, v2f64, v2i64, WriteFShuffle, + WriteFVarShuffle>; defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, - loadv4i64, v4f64, v4i64>, VEX_L; + loadv4i64, v4f64, v4i64, WriteFShuffle, + WriteFVarShuffleY>, VEX_L; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 2c180f557cf..18a32a8e44e 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -169,9 +169,12 @@ defm : BWWriteResPair<WriteFSign, [BWPort5], 1>; // Floating point fabs/fch defm : BWWriteResPair<WriteFLogic, [BWPort5], 1, [1], 1, 5>; // Floating point and/or/xor logicals. defm : BWWriteResPair<WriteFLogicY, [BWPort5], 1, [1], 1, 6>; // Floating point and/or/xor logicals (YMM/ZMM). defm : BWWriteResPair<WriteFShuffle, [BWPort5], 1>; // Floating point vector shuffles. -defm : BWWriteResPair<WriteFVarShuffle, [BWPort5], 1>; // Floating point vector variable shuffles. -defm : BWWriteResPair<WriteFBlend, [BWPort015], 1>; // Floating point vector blends. +defm : BWWriteResPair<WriteFVarShuffle, [BWPort5], 1, [1], 1, 5>; // Floating point vector variable shuffles. +defm : BWWriteResPair<WriteFVarShuffleY, [BWPort5], 1, [1], 1, 6>; // Floating point vector variable shuffles. +defm : BWWriteResPair<WriteFBlend, [BWPort015], 1, [1], 1, 5>; // Floating point vector blends. +defm : BWWriteResPair<WriteFBlendY, [BWPort015], 1, [1], 1, 6>; // Floating point vector blends. defm : BWWriteResPair<WriteFVarBlend, [BWPort5], 2, [2], 2, 5>; // Fp vector variable blends. +defm : BWWriteResPair<WriteFVarBlendY, [BWPort5], 2, [2], 2, 6>; // Fp vector variable blends. def : WriteRes<WriteCvtF2FSt, [BWPort1,BWPort4,BWPort237]> { let Latency = 4; @@ -1099,9 +1102,7 @@ def: InstRW<[BWWriteResGroup75], (instregex "VPACKSSDWYrm", "VPALIGNRYrmi", "VPBLENDWYrmi", "VPERMILPDYmi", - "VPERMILPDYrm", "VPERMILPSYmi", - "VPERMILPSYrm", "VPSHUFBYrm", "VPSHUFDYmi", "VPSHUFHWYmi", @@ -1175,9 +1176,7 @@ def BWWriteResGroup77 : SchedWriteRes<[BWPort23,BWPort015]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[BWWriteResGroup77], (instregex "VBLENDPDYrmi", - "VBLENDPSYrmi", - "VPANDNYrm", +def: InstRW<[BWWriteResGroup77], (instregex "VPANDNYrm", "VPANDYrm", "VPBLENDDYrmi", "VPORYrm", @@ -1334,9 +1333,7 @@ def BWWriteResGroup94 : SchedWriteRes<[BWPort5,BWPort23]> { let NumMicroOps = 3; let ResourceCycles = [2,1]; } -def: InstRW<[BWWriteResGroup94], (instregex "VBLENDVPDYrm", - "VBLENDVPSYrm", - "VMASKMOVPDYrm", +def: InstRW<[BWWriteResGroup94], (instregex "VMASKMOVPDYrm", "VMASKMOVPSYrm", "VPBLENDVBYrm", "VPMASKMOVDYrm", diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index e2c18531689..59d20637a4f 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -166,11 +166,14 @@ defm : HWWriteResPair<WriteFSign, [HWPort0], 1>; defm : HWWriteResPair<WriteFLogic, [HWPort5], 1, [1], 1, 6>; defm : HWWriteResPair<WriteFLogicY, [HWPort5], 1, [1], 1, 7>; defm : HWWriteResPair<WriteFShuffle, [HWPort5], 1>; -defm : HWWriteResPair<WriteFVarShuffle, [HWPort5], 1>; +defm : HWWriteResPair<WriteFVarShuffle, [HWPort5], 1, [1], 1, 6>; +defm : HWWriteResPair<WriteFVarShuffleY, [HWPort5], 1, [1], 1, 7>; defm : HWWriteResPair<WriteFBlend, [HWPort015], 1, [1], 1, 6>; +defm : HWWriteResPair<WriteFBlendY, [HWPort015], 1, [1], 1, 7>; defm : HWWriteResPair<WriteFShuffle256, [HWPort5], 3>; defm : HWWriteResPair<WriteFVarShuffle256, [HWPort5], 3>; defm : HWWriteResPair<WriteFVarBlend, [HWPort5], 2, [2], 2, 6>; +defm : HWWriteResPair<WriteFVarBlendY, [HWPort5], 2, [2], 2, 7>; def : WriteRes<WriteCvtF2FSt, [HWPort1,HWPort4,HWPort5,HWPort237]> { let Latency = 5; @@ -885,9 +888,7 @@ def: InstRW<[HWWriteResGroup13], (instregex "PUNPCKLWDrm", "(V?)PACKUSWBrm", "(V?)PALIGNRrmi", "VPERMILPDmi", - "VPERMILPDrm", "VPERMILPSmi", - "VPERMILPSrm", "(V?)PSHUFBrm", "(V?)PSHUFDmi", "(V?)PSHUFHWmi", @@ -919,9 +920,7 @@ def: InstRW<[HWWriteResGroup13_1], (instregex "VPACKSSDWYrm", "VPALIGNRYrmi", "VPBLENDWYrmi", "VPERMILPDYmi", - "VPERMILPDYrm", "VPERMILPSYmi", - "VPERMILPSYrm", "VPMOVSXBDYrm", "VPMOVSXBQYrm", "VPMOVSXWQYrm", @@ -1092,9 +1091,7 @@ def HWWriteResGroup17_2 : SchedWriteRes<[HWPort23,HWPort015]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup17_2], (instregex "VBLENDPDYrmi", - "VBLENDPSYrmi", - "VPANDNYrm", +def: InstRW<[HWWriteResGroup17_2], (instregex "VPANDNYrm", "VPANDYrm", "VPBLENDDYrmi", "VPORYrm", @@ -1272,9 +1269,7 @@ def HWWriteResGroup36_1 : SchedWriteRes<[HWPort5,HWPort23]> { let NumMicroOps = 3; let ResourceCycles = [2,1]; } -def: InstRW<[HWWriteResGroup36_1], (instregex "VBLENDVPDYrm", - "VBLENDVPSYrm", - "VMASKMOVPDYrm", +def: InstRW<[HWWriteResGroup36_1], (instregex "VMASKMOVPDYrm", "VMASKMOVPSYrm", "VPBLENDVBYrm", "VPMASKMOVDYrm", diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index f59bd57cd5c..abd4d7200d6 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -154,8 +154,11 @@ defm : SBWriteResPair<WriteFLogic, [SBPort5], 1, [1], 1, 6>; defm : SBWriteResPair<WriteFLogicY, [SBPort5], 1, [1], 1, 7>; defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1>; defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1>; +defm : SBWriteResPair<WriteFVarShuffleY,[SBPort5], 1>; defm : SBWriteResPair<WriteFBlend, [SBPort05], 1, [1], 1, 6>; +defm : SBWriteResPair<WriteFBlendY, [SBPort05], 1, [1], 1, 7>; defm : SBWriteResPair<WriteFVarBlend, [SBPort05], 2, [2], 2, 6>; +defm : SBWriteResPair<WriteFVarBlendY,[SBPort05], 2, [2], 2, 7>; def : WriteRes<WriteCvtF2FSt, [SBPort1, SBPort23, SBPort4]> { let Latency = 4; } // Vector integer operations. @@ -1156,14 +1159,6 @@ def: InstRW<[SBWriteResGroup73], (instregex "VPERM2F128rm", "VUNPCKLPDYrm", "VUNPCKLPSYrm")>; -def SBWriteResGroup74 : SchedWriteRes<[SBPort23,SBPort05]> { - let Latency = 8; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup74], (instregex "VBLENDPDYrmi", - "VBLENDPSYrmi")>; - def SBWriteResGroup75 : SchedWriteRes<[SBPort23,SBPort05]> { let Latency = 8; let NumMicroOps = 3; @@ -1335,9 +1330,7 @@ def SBWriteResGroup91 : SchedWriteRes<[SBPort23,SBPort05]> { let NumMicroOps = 3; let ResourceCycles = [1,2]; } -def: InstRW<[SBWriteResGroup91], (instregex "VBLENDVPDYrm", - "VBLENDVPSYrm", - "VMASKMOVPDYrm", +def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPDYrm", "VMASKMOVPSYrm")>; def SBWriteResGroup92 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> { diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index ee4e4ddb0cc..e1a8e405383 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -167,8 +167,11 @@ defm : SKLWriteResPair<WriteFLogic, [SKLPort015], 1, [1], 1, 6>; // Floating po defm : SKLWriteResPair<WriteFLogicY, [SKLPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM). defm : SKLWriteResPair<WriteFShuffle, [SKLPort5], 1>; // Floating point vector shuffles. defm : SKLWriteResPair<WriteFVarShuffle, [SKLPort5], 1>; // Floating point vector shuffles. +defm : SKLWriteResPair<WriteFVarShuffleY, [SKLPort5], 1>; // Floating point vector shuffles. defm : SKLWriteResPair<WriteFBlend, [SKLPort015], 1, [1], 1, 6>; // Floating point vector blends. +defm : SKLWriteResPair<WriteFBlendY, [SKLPort015], 1, [1], 1, 7>; // Floating point vector blends. defm : SKLWriteResPair<WriteFVarBlend, [SKLPort015], 2, [2], 2, 6>; // Fp vector variable blends. +defm : SKLWriteResPair<WriteFVarBlendY,[SKLPort015], 2, [2], 2, 7>; // Fp vector variable blends. def : WriteRes<WriteCvtF2FSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01]> { let Latency = 6; @@ -1626,9 +1629,7 @@ def SKLWriteResGroup110 : SchedWriteRes<[SKLPort23,SKLPort015]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup110], (instregex "VBLENDPDYrmi", - "VBLENDPSYrmi", - "VMASKMOVPDYrm", +def: InstRW<[SKLWriteResGroup110], (instregex "VMASKMOVPDYrm", "VMASKMOVPSYrm", "VPADDBYrm", "VPADDDYrm", @@ -1768,14 +1769,6 @@ def SKLWriteResGroup124 : SchedWriteRes<[SKLPort5,SKLPort01]> { } def: InstRW<[SKLWriteResGroup124], (instregex "(V?)DPPDrri")>; -def SKLWriteResGroup125 : SchedWriteRes<[SKLPort23,SKLPort015]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SKLWriteResGroup125], (instregex "VBLENDVPDYrm", - "VBLENDVPSYrm")>; - def SKLWriteResGroup126 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { let Latency = 9; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 085f44084aa..e5d4b1fc6ef 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -167,8 +167,11 @@ defm : SKXWriteResPair<WriteFLogic, [SKXPort015], 1, [1], 1, 6>; // Floating poi defm : SKXWriteResPair<WriteFLogicY, [SKXPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM). defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1>; // Floating point vector shuffles. defm : SKXWriteResPair<WriteFVarShuffle, [SKXPort5], 1>; // Floating point vector variable shuffles. +defm : SKXWriteResPair<WriteFVarShuffleY, [SKXPort5], 1>; // Floating point vector variable shuffles. defm : SKXWriteResPair<WriteFBlend, [SKXPort015], 1, [1], 1, 6>; // Floating point vector blends. +defm : SKXWriteResPair<WriteFBlendY,[SKXPort015], 1, [1], 1, 7>; // Floating point vector blends. defm : SKXWriteResPair<WriteFVarBlend, [SKXPort015], 2, [2], 2, 6>; // Fp vector variable blends. +defm : SKXWriteResPair<WriteFVarBlendY,[SKXPort015], 2, [2], 2, 7>; // Fp vector variable blends. def : WriteRes<WriteCvtF2FSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort015]> { let Latency = 6; @@ -3171,8 +3174,6 @@ def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPDZ256rm(b?)", "VBLENDMPDZrm(b?)", "VBLENDMPSZ256rm(b?)", "VBLENDMPSZrm(b?)", - "VBLENDPDYrmi", - "VBLENDPSYrmi", "VBROADCASTF32X2Z256m(b?)", "VBROADCASTF32X2Zm(b?)", "VBROADCASTF32X4Z256rm(b?)", @@ -3530,14 +3531,6 @@ def SKXWriteResGroup139 : SchedWriteRes<[SKXPort5,SKXPort015]> { } def: InstRW<[SKXWriteResGroup139], (instregex "(V?)DPPDrri")>; -def SKXWriteResGroup140 : SchedWriteRes<[SKXPort23,SKXPort015]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SKXWriteResGroup140], (instregex "VBLENDVPDYrm", - "VBLENDVPSYrm")>; - def SKXWriteResGroup141 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { let Latency = 9; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 20241bf4cc8..ac2c56e3bd9 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -94,8 +94,11 @@ defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals. defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM/ZMM). defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles. defm WriteFVarShuffle : X86SchedWritePair; // Floating point vector variable shuffles. +defm WriteFVarShuffleY : X86SchedWritePair; // Floating point vector variable shuffles (YMM/ZMM). defm WriteFBlend : X86SchedWritePair; // Floating point vector blends. +defm WriteFBlendY : X86SchedWritePair; // Floating point vector blends (YMM/ZMM). defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends. +defm WriteFVarBlendY : X86SchedWritePair; // Fp vector variable blends (YMM/ZMM). // FMA Scheduling helper class. class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 51d046f9c5d..f706bc2bd70 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -215,11 +215,14 @@ defm : AtomWriteResPair<WriteFLogic, [AtomPort01], [AtomPort0]>; defm : AtomWriteResPair<WriteFLogicY, [AtomPort01], [AtomPort0]>; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair<WriteFShuffle, [AtomPort0], [AtomPort0]>; defm : AtomWriteResPair<WriteFVarShuffle, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair<WriteFVarShuffleY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair<WriteFMA, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair<WriteFMAS, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair<WriteFMAY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair<WriteFBlend, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair<WriteFBlendY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair<WriteFVarBlend, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair<WriteFVarBlendY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair<WriteFShuffle256, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair<WriteFVarShuffle256, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom. diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 09cb5305853..ffa8e091ec2 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -331,8 +331,11 @@ defm : JWriteResFpuPair<WriteFLogic, [JFPU01, JFPX], 1>; defm : JWriteResYMMPair<WriteFLogicY, [JFPU01, JFPX], 1, [2, 2], 2>; defm : JWriteResFpuPair<WriteFShuffle, [JFPU01, JFPX], 1>; defm : JWriteResFpuPair<WriteFVarShuffle, [JFPU01, JFPX], 2, [1, 4], 3>; +defm : JWriteResYMMPair<WriteFVarShuffleY,[JFPU01, JFPX], 3, [2, 6], 6>; defm : JWriteResFpuPair<WriteFBlend, [JFPU01, JFPX], 1>; +defm : JWriteResYMMPair<WriteFBlendY, [JFPU01, JFPX], 1, [2, 2], 2>; defm : JWriteResFpuPair<WriteFVarBlend, [JFPU01, JFPX], 2, [1, 4], 3>; +defm : JWriteResYMMPair<WriteFVarBlendY, [JFPU01, JFPX], 3, [2, 6], 6>; defm : JWriteResFpuPair<WriteFShuffle256, [JFPU01, JFPX], 1>; defm : JWriteResFpuPair<WriteFVarShuffle256, [JFPU01, JFPX], 1>; // NOTE: Doesn't exist on Jaguar. @@ -681,26 +684,11 @@ def JWriteVCVTPDYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC, JFPX]> { } def : InstRW<[JWriteVCVTPDYLd, ReadAfterLd], (instrs VCVTPD2DQYrm, VCVTTPD2DQYrm, VCVTPD2PSYrm)>; -def JWriteVPERMY: SchedWriteRes<[JFPU01, JFPX]> { - let Latency = 3; - let ResourceCycles = [2, 6]; - let NumMicroOps = 6; -} -def : InstRW<[JWriteVPERMY], (instrs VBLENDVPDYrr, VBLENDVPSYrr, VPERMILPDYrr, VPERMILPSYrr)>; - -def JWriteVPERMYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> { - let Latency = 8; - let ResourceCycles = [2, 2, 6]; - let NumMicroOps = 6; -} -def : InstRW<[JWriteVPERMYLd, ReadAfterLd], (instrs VBLENDVPDYrm, VBLENDVPSYrm, VPERMILPDYrm, VPERMILPSYrm)>; - def JWriteShuffleY: SchedWriteRes<[JFPU01, JFPX]> { let ResourceCycles = [2, 2]; let NumMicroOps = 2; } -def : InstRW<[JWriteShuffleY], (instrs VBLENDPDYrri, VBLENDPSYrri, - VMOVDDUPYrr, VMOVSHDUPYrr, VMOVSLDUPYrr, +def : InstRW<[JWriteShuffleY], (instrs VMOVDDUPYrr, VMOVSHDUPYrr, VMOVSLDUPYrr, VPERMILPDYri, VPERMILPSYri, VSHUFPDYrri, VSHUFPSYrri, VUNPCKHPDYrr, VUNPCKHPSYrr, VUNPCKLPDYrr, VUNPCKLPSYrr)>; @@ -710,8 +698,7 @@ def JWriteShuffleYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> { let ResourceCycles = [2, 2, 2]; let NumMicroOps = 2; } -def : InstRW<[JWriteShuffleYLd, ReadAfterLd], (instrs VBLENDPDYrmi, VBLENDPSYrmi, - VMOVDDUPYrm, VMOVSHDUPYrm, +def : InstRW<[JWriteShuffleYLd, ReadAfterLd], (instrs VMOVDDUPYrm, VMOVSHDUPYrm, VMOVSLDUPYrm, VPERMILPDYmi, VPERMILPSYmi, VSHUFPDYrmi, VSHUFPSYrmi, VUNPCKHPDYrm, diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index c68213055b9..6d2b7d1f22b 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -145,6 +145,7 @@ defm : SLMWriteResPair<WriteFLogic, [SLM_FPC_RSV01], 1>; defm : SLMWriteResPair<WriteFLogicY, [SLM_FPC_RSV01], 1>; defm : SLMWriteResPair<WriteFShuffle, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0], 1>; +defm : SLMWriteResPair<WriteFVarShuffleY,[SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>; def : WriteRes<WriteCvtF2FSt, [SLM_FPC_RSV01, SLM_MEC_RSV]>; @@ -278,8 +279,10 @@ def : WriteRes<WriteNop, []>; // AVX/FMA is not supported on that architecture, but we should define the basic // scheduling resources anyway. def : WriteRes<WriteIMulH, [SLM_FPC_RSV0]>; +defm : SLMWriteResPair<WriteFBlendY, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteVarBlend, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 1>; +defm : SLMWriteResPair<WriteFVarBlendY, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteFShuffle256, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteFVarShuffle256, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteShuffle256, [SLM_FPC_RSV0], 1>; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index f5fdf142848..4076240cb5b 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -197,7 +197,9 @@ defm : ZnWriteResFpuPair<WriteFAdd, [ZnFPU0], 3>; defm : ZnWriteResFpuPair<WriteFCmp, [ZnFPU0], 3>; defm : ZnWriteResFpuPair<WriteFCom, [ZnFPU0], 3>; defm : ZnWriteResFpuPair<WriteFBlend, [ZnFPU01], 1>; +defm : ZnWriteResFpuPair<WriteFBlendY, [ZnFPU01], 1>; defm : ZnWriteResFpuPair<WriteFVarBlend, [ZnFPU01], 1>; +defm : ZnWriteResFpuPair<WriteFVarBlendY,[ZnFPU01], 1>; defm : ZnWriteResFpuPair<WriteVarBlend, [ZnFPU0], 1>; defm : ZnWriteResFpuPair<WriteCvtI2F, [ZnFPU3], 5>; defm : ZnWriteResFpuPair<WriteCvtF2F, [ZnFPU3], 5>; @@ -208,6 +210,7 @@ defm : ZnWriteResFpuPair<WriteFLogic, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteFLogicY, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteFShuffle, [ZnFPU12], 1>; defm : ZnWriteResFpuPair<WriteFVarShuffle, [ZnFPU12], 1>; +defm : ZnWriteResFpuPair<WriteFVarShuffleY,[ZnFPU12], 1>; defm : ZnWriteResFpuPair<WriteFMul, [ZnFPU0], 5>; defm : ZnWriteResFpuPair<WriteFMA, [ZnFPU03], 5>; defm : ZnWriteResFpuPair<WriteFMAS, [ZnFPU03], 5>; |