diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-05-03 17:56:43 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-05-03 17:56:43 +0000 |
| commit | f2d2cedab48f07d63b219836fcb7b653c9aeb27b (patch) | |
| tree | 0350ec5d24e889bf0d0ab8cae5a73aa450bd6609 /llvm/lib | |
| parent | e7532d294003fc2a835bf45ebb83e2285326e4c4 (diff) | |
| download | bcm5719-llvm-f2d2cedab48f07d63b219836fcb7b653c9aeb27b.tar.gz bcm5719-llvm-f2d2cedab48f07d63b219836fcb7b653c9aeb27b.zip | |
[X86] Split WriteVecShift/WriteVarVecShift into MMX, XMM and YMM/ZMM scheduler classes
This took a bit of extra work as on Intel targets the old (V)PSLLDrr/(V)PSLLDrm style instructions act differently - I ended up creating WriteVecShiftImm classes for XMM/YMM/ZMM vector shift by immediate and retaining WriteVecShift as the default (used only by MMX) plus WriteVecShiftX/WriteVecShiftY. X86SchedWriteWidths hides most of this thank goodness.
llvm-svn: 331472
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 16 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrMMX.td | 29 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 39 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrXOP.td | 12 | ||||
| -rwxr-xr-x | llvm/lib/Target/X86/X86SchedBroadwell.td | 76 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedHaswell.td | 88 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedSandyBridge.td | 47 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedSkylakeClient.td | 84 | ||||
| -rwxr-xr-x | llvm/lib/Target/X86/X86SchedSkylakeServer.td | 320 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Schedule.td | 29 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleAtom.td | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleBtVer2.td | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleSLM.td | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleZnver1.td | 7 |
14 files changed, 170 insertions, 597 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index cea1e57aafe..23bc1d83acf 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -5587,24 +5587,24 @@ multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq, } defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli, - SchedWriteVecShift>, + SchedWriteVecShiftImm>, avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli, - SchedWriteVecShift>, AVX512BIi8Base, EVEX_4V; + SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli, - SchedWriteVecShift>, + SchedWriteVecShiftImm>, avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli, - SchedWriteVecShift>, AVX512BIi8Base, EVEX_4V; + SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai, - SchedWriteVecShift>, + SchedWriteVecShiftImm>, avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai, - SchedWriteVecShift>, AVX512BIi8Base, EVEX_4V; + SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri, - SchedWriteVecShift>, AVX512BIi8Base, EVEX_4V; + SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli, - SchedWriteVecShift>, AVX512BIi8Base, EVEX_4V; + SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, SchedWriteVecShift>; diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td index c21c00afbfb..c410ddb4c60 100644 --- a/llvm/lib/Target/X86/X86InstrMMX.td +++ b/llvm/lib/Target/X86/X86InstrMMX.td @@ -52,7 +52,8 @@ let Constraints = "$src1 = $dst" in { multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm, string OpcodeStr, Intrinsic IntId, - Intrinsic IntId2, X86FoldableSchedWrite sched> { + Intrinsic IntId2, X86FoldableSchedWrite sched, + X86FoldableSchedWrite schedImm> { def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), @@ -68,7 +69,7 @@ let Constraints = "$src1 = $dst" in { (ins VR64:$src1, i32u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, (IntId2 VR64:$src1, imm:$src2))]>, - Sched<[sched]>; + Sched<[schedImm]>; } } @@ -412,30 +413,38 @@ defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn, // Shift Instructions defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", int_x86_mmx_psrl_w, int_x86_mmx_psrli_w, - SchedWriteVecShift.MMX>; + SchedWriteVecShift.MMX, + SchedWriteVecShiftImm.MMX>; defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", int_x86_mmx_psrl_d, int_x86_mmx_psrli_d, - SchedWriteVecShift.MMX>; + SchedWriteVecShift.MMX, + SchedWriteVecShiftImm.MMX>; defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", int_x86_mmx_psrl_q, int_x86_mmx_psrli_q, - SchedWriteVecShift.MMX>; + SchedWriteVecShift.MMX, + SchedWriteVecShiftImm.MMX>; defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", int_x86_mmx_psll_w, int_x86_mmx_pslli_w, - SchedWriteVecShift.MMX>; + SchedWriteVecShift.MMX, + SchedWriteVecShiftImm.MMX>; defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", int_x86_mmx_psll_d, int_x86_mmx_pslli_d, - SchedWriteVecShift.MMX>; + SchedWriteVecShift.MMX, + SchedWriteVecShiftImm.MMX>; defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", int_x86_mmx_psll_q, int_x86_mmx_pslli_q, - SchedWriteVecShift.MMX>; + SchedWriteVecShift.MMX, + SchedWriteVecShiftImm.MMX>; defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", int_x86_mmx_psra_w, int_x86_mmx_psrai_w, - SchedWriteVecShift.MMX>; + SchedWriteVecShift.MMX, + SchedWriteVecShiftImm.MMX>; defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", int_x86_mmx_psra_d, int_x86_mmx_psrai_d, - SchedWriteVecShift.MMX>; + SchedWriteVecShift.MMX, + SchedWriteVecShiftImm.MMX>; // Comparison Instructions defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b, diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 807a26671ad..d668963bb22 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -3396,6 +3396,7 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm, string OpcodeStr, SDNode OpNode, SDNode OpNode2, RegisterClass RC, X86FoldableSchedWrite sched, + X86FoldableSchedWrite schedImm, ValueType DstVT, ValueType SrcVT, PatFrag ld_frag, bit Is2Addr = 1> { // src2 is always 128-bit @@ -3420,25 +3421,28 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 imm:$src2))))]>, - Sched<[sched]>; + Sched<[schedImm]>; } multiclass PDI_binop_rmi_all<bits<8> opc, bits<8> opc2, Format ImmForm, string OpcodeStr, SDNode OpNode, SDNode OpNode2, ValueType DstVT128, ValueType DstVT256, ValueType SrcVT, - X86SchedWriteWidths sched, Predicate prd> { + X86SchedWriteWidths sched, + X86SchedWriteWidths schedImm, Predicate prd> { let Predicates = [HasAVX, prd] in defm V#NAME : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr), - OpNode, OpNode2, VR128, sched.XMM, DstVT128, - SrcVT, loadv2i64, 0>, VEX_4V, VEX_WIG; + OpNode, OpNode2, VR128, sched.XMM, schedImm.XMM, + DstVT128, SrcVT, loadv2i64, 0>, VEX_4V, VEX_WIG; let Predicates = [HasAVX2, prd] in defm V#NAME#Y : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr), - OpNode, OpNode2, VR256, sched.YMM, DstVT256, - SrcVT, loadv2i64, 0>, VEX_4V, VEX_L, VEX_WIG; + OpNode, OpNode2, VR256, sched.YMM, schedImm.YMM, + DstVT256, SrcVT, loadv2i64, 0>, VEX_4V, VEX_L, + VEX_WIG; let Constraints = "$src1 = $dst" in defm NAME : PDI_binop_rmi<opc, opc2, ImmForm, OpcodeStr, OpNode, OpNode2, - VR128, sched.XMM, DstVT128, SrcVT, memopv2i64>; + VR128, sched.XMM, schedImm.XMM, DstVT128, SrcVT, + memopv2i64>; } multiclass PDI_binop_ri<bits<8> opc, Format ImmForm, string OpcodeStr, @@ -3469,25 +3473,30 @@ let Constraints = "$src1 = $dst" in let ExeDomain = SSEPackedInt in { defm PSLLW : PDI_binop_rmi_all<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli, v8i16, v16i16, v8i16, SchedWriteVecShift, - NoVLX_Or_NoBWI>; + SchedWriteVecShiftImm, NoVLX_Or_NoBWI>; defm PSLLD : PDI_binop_rmi_all<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli, - v4i32, v8i32, v4i32, SchedWriteVecShift, NoVLX>; + v4i32, v8i32, v4i32, SchedWriteVecShift, + SchedWriteVecShiftImm, NoVLX>; defm PSLLQ : PDI_binop_rmi_all<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli, - v2i64, v4i64, v2i64, SchedWriteVecShift, NoVLX>; + v2i64, v4i64, v2i64, SchedWriteVecShift, + SchedWriteVecShiftImm, NoVLX>; defm PSRLW : PDI_binop_rmi_all<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli, v8i16, v16i16, v8i16, SchedWriteVecShift, - NoVLX_Or_NoBWI>; + SchedWriteVecShiftImm, NoVLX_Or_NoBWI>; defm PSRLD : PDI_binop_rmi_all<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli, - v4i32, v8i32, v4i32, SchedWriteVecShift, NoVLX>; + v4i32, v8i32, v4i32, SchedWriteVecShift, + SchedWriteVecShiftImm, NoVLX>; defm PSRLQ : PDI_binop_rmi_all<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli, - v2i64, v4i64, v2i64, SchedWriteVecShift, NoVLX>; + v2i64, v4i64, v2i64, SchedWriteVecShift, + SchedWriteVecShiftImm, NoVLX>; defm PSRAW : PDI_binop_rmi_all<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai, v8i16, v16i16, v8i16, SchedWriteVecShift, - NoVLX_Or_NoBWI>; + SchedWriteVecShiftImm, NoVLX_Or_NoBWI>; defm PSRAD : PDI_binop_rmi_all<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai, - v4i32, v8i32, v4i32, SchedWriteVecShift, NoVLX>; + v4i32, v8i32, v4i32, SchedWriteVecShift, + SchedWriteVecShiftImm, NoVLX>; defm PSLLDQ : PDI_binop_ri_all<0x73, MRM7r, "pslldq", X86vshldq, SchedWriteShuffle>; diff --git a/llvm/lib/Target/X86/X86InstrXOP.td b/llvm/lib/Target/X86/X86InstrXOP.td index 64a03a274de..dd56759b954 100644 --- a/llvm/lib/Target/X86/X86InstrXOP.td +++ b/llvm/lib/Target/X86/X86InstrXOP.td @@ -155,10 +155,14 @@ multiclass xop3opimm<bits<8> opc, string OpcodeStr, SDNode OpNode, } let ExeDomain = SSEPackedInt in { - defm VPROTB : xop3opimm<0xC0, "vprotb", X86vrotli, v16i8, SchedWriteVecShift.XMM>; - defm VPROTD : xop3opimm<0xC2, "vprotd", X86vrotli, v4i32, SchedWriteVecShift.XMM>; - defm VPROTQ : xop3opimm<0xC3, "vprotq", X86vrotli, v2i64, SchedWriteVecShift.XMM>; - defm VPROTW : xop3opimm<0xC1, "vprotw", X86vrotli, v8i16, SchedWriteVecShift.XMM>; + defm VPROTB : xop3opimm<0xC0, "vprotb", X86vrotli, v16i8, + SchedWriteVecShiftImm.XMM>; + defm VPROTD : xop3opimm<0xC2, "vprotd", X86vrotli, v4i32, + SchedWriteVecShiftImm.XMM>; + defm VPROTQ : xop3opimm<0xC3, "vprotq", X86vrotli, v2i64, + SchedWriteVecShiftImm.XMM>; + defm VPROTW : xop3opimm<0xC1, "vprotw", X86vrotli, v8i16, + SchedWriteVecShiftImm.XMM>; } // Instruction where second source can be memory, but third must be register diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 01a92dc8ffe..af7f2acf0ce 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -203,7 +203,6 @@ defm : BWWriteResPair<WriteVecALU, [BWPort15], 1, [1], 1, 5>; // Vector intege defm : BWWriteResPair<WriteVecALUY, [BWPort15], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (YMM/ZMM). defm : BWWriteResPair<WriteVecLogic, [BWPort015], 1, [1], 1, 5>; // Vector integer and/or/xor. defm : BWWriteResPair<WriteVecLogicY,[BWPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (YMM/ZMM). -defm : BWWriteResPair<WriteVecShift, [BWPort0], 1>; // Vector integer shifts. defm : BWWriteResPair<WriteVecIMul, [BWPort0], 5, [1], 1, 5>; // Vector integer multiply. defm : BWWriteResPair<WriteVecIMulY, [BWPort0], 5, [1], 1, 6>; // Vector integer multiply. defm : BWWriteResPair<WritePMULLD, [BWPort0], 10, [2], 2, 5>; // Vector PMULLD. @@ -222,6 +221,17 @@ defm : BWWriteResPair<WritePSADBW, [BWPort0], 5, [1], 1, 5>; // Vector PSADBW. defm : BWWriteResPair<WritePSADBWY, [BWPort0], 5, [1], 1, 6>; // Vector PSADBW (YMM/ZMM). defm : BWWriteResPair<WritePHMINPOS, [BWPort0], 5>; // Vector PHMINPOS. +// Vector integer shifts. +defm : BWWriteResPair<WriteVecShift, [BWPort0], 1, [1], 1, 5>; +defm : BWWriteResPair<WriteVecShiftX, [BWPort0,BWPort5], 2, [1,1], 2, 5>; +defm : X86WriteRes<WriteVecShiftY, [BWPort0,BWPort5], 4, [1,1], 2>; +defm : X86WriteRes<WriteVecShiftYLd, [BWPort0,BWPort23], 7, [1,1], 2>; + +defm : BWWriteResPair<WriteVecShiftImmX, [BWPort0], 1, [1], 1, 5>; // Vector integer immediate shifts (XMM). +defm : BWWriteResPair<WriteVecShiftImmY, [BWPort0], 1, [1], 1, 6>; // Vector integer immediate shifts (YMM/ZMM). +defm : BWWriteResPair<WriteVarVecShift, [BWPort0, BWPort5], 3, [2,1], 3, 5>; // Variable vector shifts. +defm : BWWriteResPair<WriteVarVecShiftY, [BWPort0, BWPort5], 3, [2,1], 3, 6>; // Variable vector shifts (YMM/ZMM). + // Vector insert/extract operations. def : WriteRes<WriteVecInsert, [BWPort5]> { let Latency = 2; @@ -347,7 +357,6 @@ defm : BWWriteResPair<WriteFShuffle256, [BWPort5], 3, [1], 1, 7>; // Fp 256-bit defm : BWWriteResPair<WriteFVarShuffle256, [BWPort5], 3, [1], 1, 7>; // Fp 256-bit width vector variable shuffles. defm : BWWriteResPair<WriteShuffle256, [BWPort5], 3, [1], 1, 7>; // 256-bit width vector shuffles. defm : BWWriteResPair<WriteVarShuffle256, [BWPort5], 3, [1], 1, 7>; // 256-bit width vector variable shuffles. -defm : BWWriteResPair<WriteVarVecShift, [BWPort0, BWPort5], 2, [2, 1]>; // Variable vector shifts. // Old microcoded instructions that nobody use. def : WriteRes<WriteMicrocoded, [BWPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite; @@ -545,14 +554,6 @@ def BWWriteResGroup15 : SchedWriteRes<[BWPort0,BWPort5]> { def: InstRW<[BWWriteResGroup15], (instregex "VCVTPH2PS(Y?)rr", "(V?)CVTPS2PDrr", "(V?)CVTSS2SDrr", - "(V?)PSLLDrr", - "(V?)PSLLQrr", - "(V?)PSLLWrr", - "(V?)PSRADrr", - "(V?)PSRAWrr", - "(V?)PSRLDrr", - "(V?)PSRLQrr", - "(V?)PSRLWrr", "(V?)PTESTrr")>; def BWWriteResGroup16 : SchedWriteRes<[BWPort6,BWPort0156]> { @@ -676,15 +677,6 @@ def: InstRW<[BWWriteResGroup30], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr, XCHG8rr, XCHG16rr, XCHG32rr, XCHG64rr, XCHG16ar, XCHG32ar, XCHG64ar)>; -def BWWriteResGroup31 : SchedWriteRes<[BWPort0,BWPort5]> { - let Latency = 3; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[BWWriteResGroup31], (instregex "VPSLLVD(Y?)rr", - "VPSRAVD(Y?)rr", - "VPSRLVD(Y?)rr")>; - def BWWriteResGroup33 : SchedWriteRes<[BWPort5,BWPort0156]> { let Latency = 3; let NumMicroOps = 3; @@ -757,14 +749,6 @@ def BWWriteResGroup40 : SchedWriteRes<[BWPort0,BWPort5]> { let ResourceCycles = [1,1]; } def: InstRW<[BWWriteResGroup40], (instregex "VCVTPS2PDYrr", - "VPSLLDYrr", - "VPSLLQYrr", - "VPSLLWYrr", - "VPSRADYrr", - "VPSRAWYrr", - "VPSRLDYrr", - "VPSRLQYrr", - "VPSRLWYrr", "VPTESTYrr")>; def BWWriteResGroup41 : SchedWriteRes<[BWPort0,BWPort0156]> { @@ -1066,16 +1050,8 @@ def BWWriteResGroup73 : SchedWriteRes<[BWPort0,BWPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[BWWriteResGroup73], (instregex "VPSLLDYrm", - "VPSLLQYrm", - "VPSLLVQYrm", - "VPSLLWYrm", - "VPSRADYrm", - "VPSRAWYrm", - "VPSRLDYrm", - "VPSRLQYrm", +def: InstRW<[BWWriteResGroup73], (instregex "VPSLLVQYrm", "VPSRLVQYrm", - "VPSRLWYrm", "VTESTPDYrm", "VTESTPSYrm")>; @@ -1122,15 +1098,7 @@ def BWWriteResGroup81 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[BWWriteResGroup81], (instregex "(V?)PSLLDrm", - "(V?)PSLLQrm", - "(V?)PSLLWrm", - "(V?)PSRADrm", - "(V?)PSRAWrm", - "(V?)PSRLDrm", - "(V?)PSRLQrm", - "(V?)PSRLWrm", - "(V?)PTESTrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "(V?)PTESTrm")>; def BWWriteResGroup82 : SchedWriteRes<[BWPort0,BWPort01,BWPort23]> { let Latency = 7; @@ -1233,15 +1201,6 @@ def: InstRW<[BWWriteResGroup94], (instregex "VMASKMOVPDYrm", "VPMASKMOVDYrm", "VPMASKMOVQYrm")>; -def BWWriteResGroup95 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> { - let Latency = 8; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; -} -def: InstRW<[BWWriteResGroup95], (instregex "VPSLLVDrm", - "VPSRAVDrm", - "VPSRLVDrm")>; - def BWWriteResGroup97 : SchedWriteRes<[BWPort23,BWPort237,BWPort06,BWPort0156]> { let Latency = 8; let NumMicroOps = 5; @@ -1359,15 +1318,6 @@ def BWWriteResGroup108 : SchedWriteRes<[BWPort5,BWPort23,BWPort015]> { def: InstRW<[BWWriteResGroup108], (instregex "VPBROADCASTB(Y?)rm", "VPBROADCASTW(Y?)rm")>; -def BWWriteResGroup109 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> { - let Latency = 9; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; -} -def: InstRW<[BWWriteResGroup109], (instregex "VPSLLVDYrm", - "VPSRAVDYrm", - "VPSRLVDYrm")>; - def BWWriteResGroup111 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort0156]> { let Latency = 9; let NumMicroOps = 4; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 085034bbd5f..11ce9e9687d 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -194,7 +194,6 @@ def : WriteRes<WriteVecStore, [HWPort237, HWPort4]>; def : WriteRes<WriteVecLoad, [HWPort23]> { let Latency = 5; } def : WriteRes<WriteVecMove, [HWPort015]>; -defm : HWWriteResPair<WriteVecShift, [HWPort0], 1>; defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1, [1], 1, 6>; defm : HWWriteResPair<WriteVecLogicY,[HWPort015], 1, [1], 1, 7>; defm : HWWriteResPair<WriteVecALU, [HWPort15], 1, [1], 1, 6>; @@ -213,13 +212,23 @@ defm : HWWriteResPair<WriteShuffle256, [HWPort5], 3, [1], 1, 7>; defm : HWWriteResPair<WriteVarShuffle256, [HWPort5], 3, [1], 1, 7>; defm : HWWriteResPair<WriteVarBlend, [HWPort5], 2, [2], 2, 6>; defm : HWWriteResPair<WriteVarBlendY, [HWPort5], 2, [2], 2, 7>; -defm : HWWriteResPair<WriteVarVecShift, [HWPort0, HWPort5], 2, [2, 1]>; defm : HWWriteResPair<WriteMPSAD, [HWPort0, HWPort5], 7, [1, 2], 3, 6>; defm : HWWriteResPair<WriteMPSADY, [HWPort0, HWPort5], 7, [1, 2], 3, 7>; defm : HWWriteResPair<WritePSADBW, [HWPort0], 5, [1], 1, 6>; defm : HWWriteResPair<WritePSADBWY, [HWPort0], 5, [1], 1, 7>; defm : HWWriteResPair<WritePHMINPOS, [HWPort0], 5, [1], 1, 6>; +// Vector integer shifts. +defm : HWWriteResPair<WriteVecShift, [HWPort0], 1, [1], 1, 5>; +defm : HWWriteResPair<WriteVecShiftX, [HWPort0,HWPort5], 2, [1,1], 2, 6>; +defm : X86WriteRes<WriteVecShiftY, [HWPort0,HWPort5], 4, [1,1], 2>; +defm : X86WriteRes<WriteVecShiftYLd, [HWPort0,HWPort23], 8, [1,1], 2>; + +defm : HWWriteResPair<WriteVecShiftImmX, [HWPort0], 1, [1], 1, 6>; +defm : HWWriteResPair<WriteVecShiftImmY, [HWPort0], 1, [1], 1, 7>; +defm : HWWriteResPair<WriteVarVecShift, [HWPort0, HWPort5], 3, [2,1], 3, 6>; +defm : HWWriteResPair<WriteVarVecShiftY, [HWPort0, HWPort5], 3, [2,1], 3, 7>; + // Vector insert/extract operations. def : WriteRes<WriteVecInsert, [HWPort5]> { let Latency = 2; @@ -834,16 +843,8 @@ def HWWriteResGroup11_2 : SchedWriteRes<[HWPort0,HWPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup11_2], (instregex "VPSLLDYrm", - "VPSLLQYrm", - "VPSLLVQYrm", - "VPSLLWYrm", - "VPSRADYrm", - "VPSRAWYrm", - "VPSRLDYrm", - "VPSRLQYrm", +def: InstRW<[HWWriteResGroup11_2], (instregex "VPSLLVQYrm", "VPSRLVQYrm", - "VPSRLWYrm", "VTESTPDYrm", "VTESTPSYrm")>; @@ -943,12 +944,12 @@ def: InstRW<[HWWriteResGroup16], (instregex "ANDN(32|64)rm", "BLSMSK(32|64)rm", "BLSR(32|64)rm", "MOVBE(16|32|64)rm", - "MMX_PABS(B|D|W)rm",
- "MMX_P(ADD|SUB)(B|D|W|Q)irm",
- "MMX_P(ADD|SUB)(U?)S(B|W)irm",
- "MMX_PAVG(B|W)irm",
- "MMX_PCMP(EQ|GT)(B|D|W)irm",
- "MMX_P(MAX|MIN)(SW|UB)irm",
+ "MMX_PABS(B|D|W)rm", + "MMX_P(ADD|SUB)(B|D|W|Q)irm", + "MMX_P(ADD|SUB)(U?)S(B|W)irm", + "MMX_PAVG(B|W)irm", + "MMX_PCMP(EQ|GT)(B|D|W)irm", + "MMX_P(MAX|MIN)(SW|UB)irm", "MMX_PSIGN(B|D|W)rm")>; def HWWriteResGroup17 : SchedWriteRes<[HWPort23,HWPort015]> { @@ -1082,14 +1083,6 @@ def: InstRW<[HWWriteResGroup31], (instregex "VCVTPH2PSYrr", "VCVTPH2PSrr", "(V?)CVTPS2PDrr", "(V?)CVTSS2SDrr", - "(V?)PSLLDrr", - "(V?)PSLLQrr", - "(V?)PSLLWrr", - "(V?)PSRADrr", - "(V?)PSRAWrr", - "(V?)PSRLDrr", - "(V?)PSRLQrr", - "(V?)PSRLWrr", "(V?)PTESTrr")>; def HWWriteResGroup32 : SchedWriteRes<[HWPort6,HWPort0156]> { @@ -1176,15 +1169,7 @@ def HWWriteResGroup38 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[HWWriteResGroup38], (instregex "(V?)PSLLDrm", - "(V?)PSLLQrm", - "(V?)PSLLWrm", - "(V?)PSRADrm", - "(V?)PSRAWrm", - "(V?)PSRLDrm", - "(V?)PSRLQrm", - "(V?)PSRLWrm", - "(V?)PTESTrm")>; +def: InstRW<[HWWriteResGroup38], (instregex "(V?)PTESTrm")>; def HWWriteResGroup39 : SchedWriteRes<[HWPort0,HWPort01,HWPort23]> { let Latency = 7; @@ -1338,15 +1323,6 @@ def: InstRW<[HWWriteResGroup54], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr, XCHG8rr, XCHG16rr, XCHG32rr, XCHG64rr, XCHG16ar, XCHG32ar, XCHG64ar)>; -def HWWriteResGroup55 : SchedWriteRes<[HWPort0,HWPort5]> { - let Latency = 3; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[HWWriteResGroup55], (instregex "VPSLLVD(Y?)rr", - "VPSRAVD(Y?)rr", - "VPSRLVD(Y?)rr")>; - def HWWriteResGroup57 : SchedWriteRes<[HWPort5,HWPort0156]> { let Latency = 3; let NumMicroOps = 3; @@ -1400,24 +1376,6 @@ def HWWriteResGroup62 : SchedWriteRes<[HWPort1,HWPort4,HWPort237]> { def: InstRW<[HWWriteResGroup62], (instregex "IST(T?)_FP(16|32|64)m", "IST_F(16|32)m")>; -def HWWriteResGroup63 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> { - let Latency = 10; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; -} -def: InstRW<[HWWriteResGroup63], (instregex "VPSLLVDYrm", - "VPSRAVDYrm", - "VPSRLVDYrm")>; - -def HWWriteResGroup63_1 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> { - let Latency = 9; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; -} -def: InstRW<[HWWriteResGroup63_1], (instregex "VPSLLVDrm", - "VPSRAVDrm", - "VPSRLVDrm")>; - def HWWriteResGroup64 : SchedWriteRes<[HWPort5,HWPort23,HWPort15]> { let Latency = 8; let NumMicroOps = 4; @@ -1491,14 +1449,6 @@ def HWWriteResGroup71 : SchedWriteRes<[HWPort0,HWPort5]> { let ResourceCycles = [1,1]; } def: InstRW<[HWWriteResGroup71], (instregex "VCVTPS2PDYrr", - "VPSLLDYrr", - "VPSLLQYrr", - "VPSLLWYrr", - "VPSRADYrr", - "VPSRAWYrr", - "VPSRLDYrr", - "VPSRLQYrr", - "VPSRLWYrr", "VPTESTYrr")>; def HWWriteResGroup72 : SchedWriteRes<[HWPort0,HWPort0156]> { diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 2d4985fcd4f..876afdb5a6b 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -174,7 +174,6 @@ def : WriteRes<WriteVecStore, [SBPort23, SBPort4]>; def : WriteRes<WriteVecLoad, [SBPort23]> { let Latency = 6; } def : WriteRes<WriteVecMove, [SBPort05]>; -defm : SBWriteResPair<WriteVecShift, [SBPort5], 1>; defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 6>; defm : SBWriteResPair<WriteVecLogicY,[SBPort015], 1, [1], 1, 7>; defm : SBWriteResPair<WriteVecALU, [SBPort15], 1, [1], 1, 6>; @@ -197,6 +196,15 @@ defm : SBWriteResPair<WritePSADBW, [SBPort0], 5, [1], 1, 6>; defm : SBWriteResPair<WritePSADBWY, [SBPort0], 5, [1], 1, 7>; defm : SBWriteResPair<WritePHMINPOS, [SBPort0], 5, [1], 1, 6>; +// Vector integer shifts. +defm : SBWriteResPair<WriteVecShift, [SBPort5], 1, [1], 1, 5>; +defm : SBWriteResPair<WriteVecShiftX, [SBPort0,SBPort15], 2, [1,1], 2, 6>; +defm : SBWriteResPair<WriteVecShiftY, [SBPort0,SBPort15], 4, [1,1], 2, 7>; +defm : SBWriteResPair<WriteVecShiftImmX, [SBPort0], 1, [1], 1, 6>; +defm : SBWriteResPair<WriteVecShiftImmY, [SBPort0], 1, [1], 1, 7>; +defm : SBWriteResPair<WriteVarVecShift, [SBPort0], 1, [1], 1, 6>; +defm : SBWriteResPair<WriteVarVecShiftY, [SBPort0], 1, [1], 1, 7>; + // Vector insert/extract operations. def : WriteRes<WriteVecInsert, [SBPort5,SBPort15]> { let Latency = 2; @@ -336,7 +344,6 @@ defm : SBWriteResPair<WriteFShuffle256, [SBPort5], 1, [1], 1, 7>; defm : SBWriteResPair<WriteFVarShuffle256, [SBPort5], 1, [1], 1, 7>; defm : SBWriteResPair<WriteShuffle256, [SBPort5], 1, [1], 1, 7>; defm : SBWriteResPair<WriteVarShuffle256, [SBPort5], 1, [1], 1, 7>; -defm : SBWriteResPair<WriteVarVecShift, [SBPort0], 1>; defm : SBWriteResPair<WriteFMA, [SBPort01], 5>; defm : SBWriteResPair<WriteFMAS, [SBPort01], 5>; defm : SBWriteResPair<WriteFMAY, [SBPort01], 5>; @@ -349,14 +356,6 @@ def SBWriteResGroup0 : SchedWriteRes<[SBPort0]> { let ResourceCycles = [1]; } def: InstRW<[SBWriteResGroup0], (instregex "(V?)CVTSS2SDrr", - "(V?)PSLLDri", - "(V?)PSLLQri", - "(V?)PSLLWri", - "(V?)PSRADri", - "(V?)PSRAWri", - "(V?)PSRLDri", - "(V?)PSRLQri", - "(V?)PSRLWri", "VTESTPD(Y?)rr", "VTESTPS(Y?)rr")>; @@ -496,20 +495,6 @@ def SBWriteResGroup13 : SchedWriteRes<[SBPort0,SBPort5]> { def: InstRW<[SBWriteResGroup13], (instregex "(V?)CVTPS2PD(Y?)rr", "(V?)PTEST(Y?)rr")>; -def SBWriteResGroup14 : SchedWriteRes<[SBPort0,SBPort15]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup14], (instregex "(V?)PSLLDrr", - "(V?)PSLLQrr", - "(V?)PSLLWrr", - "(V?)PSRADrr", - "(V?)PSRAWrr", - "(V?)PSRLDrr", - "(V?)PSRLQrr", - "(V?)PSRLWrr")>; - def SBWriteResGroup15 : SchedWriteRes<[SBPort0,SBPort015]> { let Latency = 2; let NumMicroOps = 2; @@ -1073,20 +1058,6 @@ def SBWriteResGroup78 : SchedWriteRes<[SBPort0,SBPort5,SBPort23]> { } def: InstRW<[SBWriteResGroup78], (instregex "(V?)PTESTrm")>; -def SBWriteResGroup79 : SchedWriteRes<[SBPort0,SBPort23,SBPort15]> { - let Latency = 8; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup79], (instregex "(V?)PSLLDrm", - "(V?)PSLLQrm", - "(V?)PSLLWrm", - "(V?)PSRADrm", - "(V?)PSRAWrm", - "(V?)PSRLDrm", - "(V?)PSRLQrm", - "(V?)PSRLWrm")>; - def SBWriteResGroup80 : SchedWriteRes<[SBPort23,SBPort15]> { let Latency = 8; let NumMicroOps = 4; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 402ae1f2d67..0ef7938e2ae 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -199,7 +199,6 @@ defm : SKLWriteResPair<WriteVecALU, [SKLPort01], 1, [1], 1, 6>; // Vector inte defm : SKLWriteResPair<WriteVecALUY, [SKLPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM). defm : SKLWriteResPair<WriteVecLogic, [SKLPort015], 1, [1], 1, 6>; // Vector integer and/or/xor. defm : SKLWriteResPair<WriteVecLogicY,[SKLPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM). -defm : SKLWriteResPair<WriteVecShift, [SKLPort0], 1>; // Vector integer shifts. defm : SKLWriteResPair<WriteVecIMul, [SKLPort01], 4, [1], 1, 6>; // Vector integer multiply. defm : SKLWriteResPair<WriteVecIMulY, [SKLPort01], 4, [1], 1, 7>; // Vector integer multiply (YMM/ZMM). defm : SKLWriteResPair<WritePMULLD, [SKLPort01], 10, [2], 2, 6>; // Vector PMULLD. @@ -218,6 +217,17 @@ defm : SKLWriteResPair<WritePSADBW, [SKLPort5], 3, [1], 1, 6>; // Vector PSADBW defm : SKLWriteResPair<WritePSADBWY, [SKLPort5], 3, [1], 1, 7>; // Vector PSADBW. defm : SKLWriteResPair<WritePHMINPOS, [SKLPort01], 4, [1], 1, 6>; // Vector PHMINPOS. +// Vector integer shifts. +defm : SKLWriteResPair<WriteVecShift, [SKLPort0], 1, [1], 1, 5>; +defm : SKLWriteResPair<WriteVecShiftX, [SKLPort5,SKLPort01], 2, [1,1], 2, 6>; +defm : X86WriteRes<WriteVecShiftY, [SKLPort5,SKLPort01], 4, [1,1], 2>; +defm : X86WriteRes<WriteVecShiftYLd, [SKLPort01,SKLPort23], 8, [1,1], 2>; + +defm : SKLWriteResPair<WriteVecShiftImmX, [SKLPort01], 1, [1], 1, 6>; // Vector integer immediate shifts (XMM). +defm : SKLWriteResPair<WriteVecShiftImmY, [SKLPort01], 1, [1], 1, 7>; // Vector integer immediate shifts (YMM/ZMM). +defm : SKLWriteResPair<WriteVarVecShift, [SKLPort01], 1, [1], 1, 6>; // Variable vector shifts. +defm : SKLWriteResPair<WriteVarVecShiftY, [SKLPort01], 1, [1], 1, 7>; // Variable vector shifts (YMM/ZMM). + // Vector insert/extract operations. def : WriteRes<WriteVecInsert, [SKLPort5]> { let Latency = 2; @@ -353,7 +363,6 @@ defm : SKLWriteResPair<WriteFShuffle256, [SKLPort5], 3, [1], 1, 7>; // Fp 256-bi defm : SKLWriteResPair<WriteFVarShuffle256, [SKLPort5], 3, [1], 1, 7>; // Fp 256-bit width vector variable shuffles. defm : SKLWriteResPair<WriteShuffle256, [SKLPort5], 3, [1], 1, 7>; // 256-bit width vector shuffles. defm : SKLWriteResPair<WriteVarShuffle256, [SKLPort5], 3, [1], 1, 7>; // 256-bit width vector variable shuffles. -defm : SKLWriteResPair<WriteVarVecShift, [SKLPort0, SKLPort5], 2, [2, 1]>; // Variable vector shifts. // Old microcoded instructions that nobody use. def : WriteRes<WriteMicrocoded, [SKLPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite; @@ -426,25 +435,6 @@ def SKLWriteResGroup4 : SchedWriteRes<[SKLPort6]> { } def: InstRW<[SKLWriteResGroup4], (instregex "JMP(16|32|64)r")>; -def SKLWriteResGroup5 : SchedWriteRes<[SKLPort01]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SKLWriteResGroup5], (instregex "(V?)PSLLD(Y?)ri", - "(V?)PSLLQ(Y?)ri", - "VPSLLVD(Y?)rr", - "VPSLLVQ(Y?)rr", - "(V?)PSLLW(Y?)ri", - "(V?)PSRAD(Y?)ri", - "VPSRAVD(Y?)rr", - "(V?)PSRAW(Y?)ri", - "(V?)PSRLD(Y?)ri", - "(V?)PSRLQ(Y?)ri", - "VPSRLVD(Y?)rr", - "VPSRLVQ(Y?)rr", - "(V?)PSRLW(Y?)ri")>; - def SKLWriteResGroup6 : SchedWriteRes<[SKLPort05]> { let Latency = 1; let NumMicroOps = 1; @@ -619,20 +609,6 @@ def: InstRW<[SKLWriteResGroup18], (instregex "VMASKMOVPD(Y?)mr", "VPMASKMOVD(Y?)mr", "VPMASKMOVQ(Y?)mr")>; -def SKLWriteResGroup19 : SchedWriteRes<[SKLPort5,SKLPort01]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup19], (instregex "(V?)PSLLDrr", - "(V?)PSLLQrr", - "(V?)PSLLWrr", - "(V?)PSRADrr", - "(V?)PSRAWrr", - "(V?)PSRLDrr", - "(V?)PSRLQrr", - "(V?)PSRLWrr")>; - def SKLWriteResGroup20 : SchedWriteRes<[SKLPort6,SKLPort0156]> { let Latency = 2; let NumMicroOps = 2; @@ -895,20 +871,6 @@ def SKLWriteResGroup51_16 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { } def: InstRW<[SKLWriteResGroup51_16], (instrs IMUL16r, MUL16r)>; -def SKLWriteResGroup52 : SchedWriteRes<[SKLPort5,SKLPort01]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup52], (instregex "VPSLLDYrr", - "VPSLLQYrr", - "VPSLLWYrr", - "VPSRADYrr", - "VPSRAWYrr", - "VPSRLDYrr", - "VPSRLQYrr", - "VPSRLWYrr")>; - def SKLWriteResGroup53 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237]> { let Latency = 4; let NumMicroOps = 3; @@ -1263,16 +1225,11 @@ def SKLWriteResGroup90 : SchedWriteRes<[SKLPort01,SKLPort23]> { } def: InstRW<[SKLWriteResGroup90], (instregex "(V?)PSLLDrm", "(V?)PSLLQrm", - "VPSLLVDrm", - "VPSLLVQrm", "(V?)PSLLWrm", "(V?)PSRADrm", - "VPSRAVDrm", "(V?)PSRAWrm", "(V?)PSRLDrm", "(V?)PSRLQrm", - "(V?)PSRLVDrm", - "VPSRLVQrm", "(V?)PSRLWrm")>; def SKLWriteResGroup91 : SchedWriteRes<[SKLPort23,SKLPort015]> { @@ -1431,25 +1388,6 @@ def: InstRW<[SKLWriteResGroup108], (instregex "FCOM32m", "VPMOVSXBQYrm", "VPMOVSXWQYrm")>; -def SKLWriteResGroup109 : SchedWriteRes<[SKLPort01,SKLPort23]> { - let Latency = 8; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup109], (instregex "VPSLLDYrm", - "VPSLLQYrm", - "VPSLLVDYrm", - "VPSLLVQYrm", - "VPSLLWYrm", - "VPSRADYrm", - "VPSRAVDYrm", - "VPSRAWYrm", - "VPSRLDYrm", - "VPSRLQYrm", - "VPSRLVDYrm", - "VPSRLVQYrm", - "VPSRLWYrm")>; - def SKLWriteResGroup110 : SchedWriteRes<[SKLPort23,SKLPort015]> { let Latency = 8; let NumMicroOps = 2; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index b1bd040f271..41b75824777 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -199,7 +199,6 @@ defm : SKXWriteResPair<WriteVecALU, [SKXPort01], 1, [1], 1, 6>; // Vector inte defm : SKXWriteResPair<WriteVecALUY, [SKXPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM). defm : SKXWriteResPair<WriteVecLogic, [SKXPort015], 1, [1], 1, 6>; // Vector integer and/or/xor. defm : SKXWriteResPair<WriteVecLogicY,[SKXPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM). -defm : SKXWriteResPair<WriteVecShift, [SKXPort0], 1>; // Vector integer shifts. defm : SKXWriteResPair<WriteVecIMul, [SKXPort015], 4, [1], 1, 6>; // Vector integer multiply. defm : SKXWriteResPair<WriteVecIMulY, [SKXPort015], 4, [1], 1, 7>; // Vector integer multiply (YMM/ZMM). defm : SKXWriteResPair<WritePMULLD, [SKXPort015], 10, [2], 2, 6>; // Vector PMULLD. @@ -218,6 +217,18 @@ defm : SKXWriteResPair<WritePSADBW, [SKXPort5], 3, [1], 1, 6>; // Vector PSADBW defm : SKXWriteResPair<WritePSADBWY, [SKXPort5], 3, [1], 1, 7>; // Vector PSADBW. defm : SKXWriteResPair<WritePHMINPOS, [SKXPort015], 4, [1], 1, 6>; // Vector PHMINPOS. +// Vector integer shifts. +defm : SKXWriteResPair<WriteVecShift, [SKXPort0], 1, [1], 1, 5>; +defm : X86WriteRes<WriteVecShiftX, [SKXPort5,SKXPort01], 2, [1,1], 2>; +defm : X86WriteRes<WriteVecShiftY, [SKXPort5,SKXPort01], 4, [1,1], 2>; +defm : X86WriteRes<WriteVecShiftXLd, [SKXPort01,SKXPort23], 7, [1,1], 2>; +defm : X86WriteRes<WriteVecShiftYLd, [SKXPort01,SKXPort23], 8, [1,1], 2>; + +defm : SKXWriteResPair<WriteVecShiftImmX, [SKXPort01], 1, [1], 1, 6>; // Vector integer immediate shifts (XMM). +defm : SKXWriteResPair<WriteVecShiftImmY, [SKXPort01], 1, [1], 1, 7>; // Vector integer immediate shifts (YMM/ZMM). +defm : SKXWriteResPair<WriteVarVecShift, [SKXPort01], 1, [1], 1, 6>; // Variable vector shifts. +defm : SKXWriteResPair<WriteVarVecShiftY, [SKXPort01], 1, [1], 1, 7>; // Variable vector shifts (YMM/ZMM). + // Vector insert/extract operations. def : WriteRes<WriteVecInsert, [SKXPort5]> { let Latency = 2; @@ -353,7 +364,6 @@ defm : SKXWriteResPair<WriteFShuffle256, [SKXPort5], 3, [1], 1, 7>; // Fp 256-bi defm : SKXWriteResPair<WriteFVarShuffle256, [SKXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector variable shuffles. defm : SKXWriteResPair<WriteShuffle256, [SKXPort5], 3, [1], 1, 7>; // 256-bit width vector shuffles. defm : SKXWriteResPair<WriteVarShuffle256, [SKXPort5], 3, [1], 1, 7>; // 256-bit width vector variable shuffles. -defm : SKXWriteResPair<WriteVarVecShift, [SKXPort0, SKXPort5], 2, [2, 1]>; // Variable vector shifts. // Old microcoded instructions that nobody use. def : WriteRes<WriteMicrocoded, [SKXPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite; @@ -474,116 +484,6 @@ def SKXWriteResGroup4 : SchedWriteRes<[SKXPort6]> { } def: InstRW<[SKXWriteResGroup4], (instregex "JMP(16|32|64)r")>; -def SKXWriteResGroup5 : SchedWriteRes<[SKXPort01]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SKXWriteResGroup5], (instregex "VPROLDZ128ri", - "VPROLDZ256ri", - "VPROLDZri", - "VPROLQZ128ri", - "VPROLQZ256ri", - "VPROLQZri", - "VPROLVDZ128rr", - "VPROLVDZ256rr", - "VPROLVDZrr", - "VPROLVQZ128rr", - "VPROLVQZ256rr", - "VPROLVQZrr", - "VPRORDZ128ri", - "VPRORDZ256ri", - "VPRORDZri", - "VPRORQZ128ri", - "VPRORQZ256ri", - "VPRORQZri", - "VPRORVDZ128rr", - "VPRORVDZ256rr", - "VPRORVDZrr", - "VPRORVQZ128rr", - "VPRORVQZ256rr", - "VPRORVQZrr", - "(V?)PSLLDYri", - "VPSLLDZ128ri", - "VPSLLDZ256ri", - "VPSLLDZri", - "(V?)PSLLDri", - "VPSLLQYri", - "VPSLLQZ128ri", - "VPSLLQZ256ri", - "VPSLLQZri", - "(V?)PSLLQri", - "VPSLLVDYrr", - "VPSLLVDZ128rr", - "VPSLLVDZ256rr", - "VPSLLVDZrr", - "VPSLLVDrr", - "VPSLLVQYrr", - "VPSLLVQZ128rr", - "VPSLLVQZ256rr", - "VPSLLVQZrr", - "VPSLLVQrr", - "VPSLLVWZ128rr", - "VPSLLVWZ256rr", - "VPSLLVWZrr", - "VPSLLWYri", - "VPSLLWZ128ri", - "VPSLLWZ256ri", - "VPSLLWZri", - "(V?)PSLLWri", - "VPSRADYri", - "VPSRADZ128ri", - "VPSRADZ256ri", - "VPSRADZri", - "(V?)PSRADri", - "VPSRAQZ128ri", - "VPSRAQZ256ri", - "VPSRAQZri", - "VPSRAVDYrr", - "VPSRAVDZ128rr", - "VPSRAVDZ256rr", - "VPSRAVDZrr", - "VPSRAVDrr", - "VPSRAVQZ128rr", - "VPSRAVQZ256rr", - "VPSRAVQZrr", - "VPSRAVWZ128rr", - "VPSRAVWZ256rr", - "VPSRAVWZrr", - "VPSRAWYri", - "VPSRAWZ128ri", - "VPSRAWZ256ri", - "VPSRAWZri", - "(V?)PSRAWri", - "VPSRLDYri", - "VPSRLDZ128ri", - "VPSRLDZ256ri", - "VPSRLDZri", - "(V?)PSRLDri", - "VPSRLQYri", - "VPSRLQZ128ri", - "VPSRLQZ256ri", - "VPSRLQZri", - "(V?)PSRLQri", - "VPSRLVDYrr", - "VPSRLVDZ128rr", - "VPSRLVDZ256rr", - "VPSRLVDZrr", - "VPSRLVDrr", - "VPSRLVQYrr", - "VPSRLVQZ128rr", - "VPSRLVQZ256rr", - "VPSRLVQZrr", - "VPSRLVQrr", - "VPSRLVWZ128rr", - "VPSRLVWZ256rr", - "VPSRLVWZrr", - "VPSRLWYri", - "VPSRLWZ128ri", - "VPSRLWZ256ri", - "VPSRLWZri", - "(V?)PSRLWri")>; - def SKXWriteResGroup6 : SchedWriteRes<[SKXPort05]> { let Latency = 1; let NumMicroOps = 1; @@ -915,28 +815,6 @@ def: InstRW<[SKXWriteResGroup18], (instregex "VMASKMOVPDYmr", "VPMASKMOVQYmr", "VPMASKMOVQmr")>; -def SKXWriteResGroup19 : SchedWriteRes<[SKXPort5,SKXPort01]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKXWriteResGroup19], (instregex "VPSLLDZ128rr", - "(V?)PSLLDrr", - "VPSLLQZ128rr", - "(V?)PSLLQrr", - "VPSLLWZ128rr", - "(V?)PSLLWrr", - "VPSRADZ128rr", - "(V?)PSRADrr", - "VPSRAQZ128rr", - "VPSRAWZ128rr", - "(V?)PSRAWrr", - "VPSRLDZ128rr", - "(V?)PSRLDrr", - "VPSRLQZ128rr", - "(V?)PSRLQrr", - "(V?)PSRLWrr")>; - def SKXWriteResGroup20 : SchedWriteRes<[SKXPort6,SKXPort0156]> { let Latency = 2; let NumMicroOps = 2; @@ -1462,38 +1340,6 @@ def SKXWriteResGroup52_16 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { } def: InstRW<[SKXWriteResGroup52_16], (instrs IMUL16r, MUL16r)>; -def SKXWriteResGroup53 : SchedWriteRes<[SKXPort5,SKXPort01]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKXWriteResGroup53], (instregex "VPSLLDYrr", - "VPSLLDZ256rr", - "VPSLLDZrr", - "VPSLLQYrr", - "VPSLLQZ256rr", - "VPSLLQZrr", - "VPSLLWYrr", - "VPSLLWZ256rr", - "VPSLLWZrr", - "VPSRADYrr", - "VPSRADZ256rr", - "VPSRADZrr", - "VPSRAQZ256rr", - "VPSRAQZrr", - "VPSRAWYrr", - "VPSRAWZ256rr", - "VPSRAWZrr", - "VPSRLDYrr", - "VPSRLDZ256rr", - "VPSRLDZrr", - "VPSRLQYrr", - "VPSRLQZ256rr", - "VPSRLQZrr", - "VPSRLWYrr", - "VPSRLWZ256rr", - "VPSRLWZrr")>; - def SKXWriteResGroup54 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { let Latency = 4; let NumMicroOps = 3; @@ -2066,59 +1912,6 @@ def: InstRW<[SKXWriteResGroup93], (instregex "VCVTDQ2PDZ256rr", "VCVTUQQ2PSZ256rr", "VCVTUQQ2PSZrr")>; -def SKXWriteResGroup94 : SchedWriteRes<[SKXPort01,SKXPort23]> { - let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKXWriteResGroup94], (instregex "VPROLDZ128m(b?)i", - "VPROLQZ128m(b?)i", - "VPROLVDZ128rm(b?)", - "VPROLVQZ128rm(b?)", - "VPRORDZ128m(b?)i", - "VPRORQZ128m(b?)i", - "VPRORVDZ128rm(b?)", - "VPRORVQZ128rm(b?)", - "VPSLLDZ128m(b?)i", - "VPSLLDZ128rm(b?)", - "(V?)PSLLDrm", - "VPSLLQZ128m(b?)i", - "VPSLLQZ128rm(b?)", - "(V?)PSLLQrm", - "VPSLLVDZ128rm(b?)", - "VPSLLVDrm", - "VPSLLVQZ128rm(b?)", - "VPSLLVQrm", - "VPSLLVWZ128rm(b?)", - "VPSLLWZ128mi(b?)", - "VPSLLWZ128rm(b?)", - "(V?)PSLLWrm", - "VPSRADZ128m(b?)i", - "VPSRADZ128rm(b?)", - "(V?)PSRADrm", - "VPSRAQZ128m(b?)i", - "VPSRAQZ128rm(b?)", - "VPSRAVDZ128rm(b?)", - "VPSRAVDrm", - "VPSRAVQZ128rm(b?)", - "VPSRAVWZ128rm(b?)", - "VPSRAWZ128mi(b?)", - "VPSRAWZ128rm(b?)", - "(V?)PSRAWrm", - "VPSRLDZ128m(b?)i", - "VPSRLDZ128rm(b?)", - "(V?)PSRLDrm", - "VPSRLQZ128m(b?)i", - "VPSRLQZ128rm(b?)", - "(V?)PSRLQrm", - "VPSRLVDZ128rm(b?)", - "VPSRLVDrm", - "VPSRLVQZ128rm(b?)", - "VPSRLVQrm", - "VPSRLVWZ128rm(b?)", - "VPSRLWZ128mi(b?)", - "VPSRLWZ128rm(b?)", - "(V?)PSRLWrm")>; def SKXWriteResGroup95 : SchedWriteRes<[SKXPort23,SKXPort015]> { let Latency = 7; @@ -2406,95 +2199,6 @@ def: InstRW<[SKXWriteResGroup119], (instregex "FCOM32m", "VPMOVSXBQYrm", "VPMOVSXWQYrm")>; -def SKXWriteResGroup120 : SchedWriteRes<[SKXPort01,SKXPort23]> { - let Latency = 8; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKXWriteResGroup120], (instregex "VPROLDZ256m(b?)i", - "VPROLDZm(b?)i", - "VPROLQZ256m(b?)i", - "VPROLQZm(b?)i", - "VPROLVDZ256rm(b?)", - "VPROLVDZrm(b?)", - "VPROLVQZ256rm(b?)", - "VPROLVQZrm(b?)", - "VPRORDZ256m(b?)i", - "VPRORDZm(b?)i", - "VPRORQZ256m(b?)i", - "VPRORQZm(b?)i", - "VPRORVDZ256rm(b?)", - "VPRORVDZrm(b?)", - "VPRORVQZ256rm(b?)", - "VPRORVQZrm(b?)", - "VPSLLDYrm", - "VPSLLDZ256m(b?)i", - "VPSLLDZ256rm(b?)", - "VPSLLDZm(b?)i", - "VPSLLDZrm(b?)", - "VPSLLQYrm", - "VPSLLQZ256m(b?)i", - "VPSLLQZ256rm(b?)", - "VPSLLQZm(b?)i", - "VPSLLQZrm(b?)", - "VPSLLVDYrm", - "VPSLLVDZ256rm(b?)", - "VPSLLVDZrm(b?)", - "VPSLLVQYrm", - "VPSLLVQZ256rm(b?)", - "VPSLLVQZrm(b?)", - "VPSLLVWZ256rm(b?)", - "VPSLLVWZrm(b?)", - "VPSLLWYrm", - "VPSLLWZ256mi(b?)", - "VPSLLWZ256rm(b?)", - "VPSLLWZmi(b?)", - "VPSLLWZrm(b?)", - "VPSRADYrm", - "VPSRADZ256m(b?)i", - "VPSRADZ256rm(b?)", - "VPSRADZm(b?)i", - "VPSRADZrm(b?)", - "VPSRAQZ256m(b?)i", - "VPSRAQZ256rm(b?)", - "VPSRAQZm(b?)i", - "VPSRAQZrm(b?)", - "VPSRAVDYrm", - "VPSRAVDZ256rm(b?)", - "VPSRAVDZrm(b?)", - "VPSRAVQZ256rm(b?)", - "VPSRAVQZrm(b?)", - "VPSRAVWZ256rm(b?)", - "VPSRAVWZrm(b?)", - "VPSRAWYrm", - "VPSRAWZ256mi(b?)", - "VPSRAWZ256rm(b?)", - "VPSRAWZmi(b?)", - "VPSRAWZrm(b?)", - "VPSRLDYrm", - "VPSRLDZ256m(b?)i", - "VPSRLDZ256rm(b?)", - "VPSRLDZm(b?)i", - "VPSRLDZrm(b?)", - "VPSRLQYrm", - "VPSRLQZ256m(b?)i", - "VPSRLQZ256rm(b?)", - "VPSRLQZm(b?)i", - "VPSRLQZrm(b?)", - "VPSRLVDYrm", - "VPSRLVDZ256rm(b?)", - "VPSRLVDZrm(b?)", - "VPSRLVQYrm", - "VPSRLVQZ256rm(b?)", - "VPSRLVQZrm(b?)", - "VPSRLVWZ256rm(b?)", - "VPSRLVWZrm(b?)", - "VPSRLWYrm", - "VPSRLWZ256mi(b?)", - "VPSRLWZ256rm(b?)", - "VPSRLWZmi(b?)", - "VPSRLWZrm(b?)")>; - def SKXWriteResGroup121 : SchedWriteRes<[SKXPort23,SKXPort015]> { let Latency = 8; let NumMicroOps = 2; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 7e984a5e187..7f6a38f6a4f 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -19,6 +19,17 @@ def ReadAfterLd : SchedRead; // load + WriteRMW. def WriteRMW : SchedWrite; +// Helper to set SchedWrite ExePorts/Latency/ResourceCycles/NumMicroOps. +multiclass X86WriteRes<SchedWrite SchedRW, + list<ProcResourceKind> ExePorts, + int Lat, list<int> Res, int UOps> { + def : WriteRes<SchedRW, ExePorts> { + let Latency = Lat; + let ResourceCycles = Res; + let NumMicroOps = UOps; + } +} + // Most instructions can fold loads, so almost every SchedWrite comes in two // variants: With and without a folded load. // An X86FoldableSchedWrite holds a reference to the corresponding SchedWrite @@ -137,7 +148,11 @@ defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals. defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM/ZMM). defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals. defm WriteVecLogicY: X86SchedWritePair; // Vector integer and/or/xor logicals (YMM/ZMM). -defm WriteVecShift : X86SchedWritePair; // Vector integer shifts. +defm WriteVecShift : X86SchedWritePair; // Vector integer shifts (default). +defm WriteVecShiftX : X86SchedWritePair; // Vector integer shifts (XMM). +defm WriteVecShiftY : X86SchedWritePair; // Vector integer shifts (YMM/ZMM). +defm WriteVecShiftImmX: X86SchedWritePair; // Vector integer immediate shifts (XMM). +defm WriteVecShiftImmY: X86SchedWritePair; // Vector integer immediate shifts (YMM/ZMM). defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply. defm WriteVecIMulY : X86SchedWritePair; // Vector integer multiply (YMM/ZMM). defm WritePMULLD : X86SchedWritePair; // Vector PMULLD. @@ -205,7 +220,8 @@ defm WriteFShuffle256 : X86SchedWritePair; // Fp 256-bit width vector shuffles. defm WriteFVarShuffle256 : X86SchedWritePair; // Fp 256-bit width variable shuffles. defm WriteShuffle256 : X86SchedWritePair; // 256-bit width vector shuffles. defm WriteVarShuffle256 : X86SchedWritePair; // 256-bit width vector variable shuffles. -defm WriteVarVecShift : X86SchedWritePair; // Variable vector shifts. +defm WriteVarVecShift : X86SchedWritePair; // Variable vector shifts. +defm WriteVarVecShiftY : X86SchedWritePair; // Variable vector shifts (YMM/ZMM). // Old microcoded instructions that nobody use. def WriteMicrocoded : SchedWrite; @@ -258,11 +274,14 @@ def SchedWriteVecLogic : X86SchedWriteWidths<WriteVecLogic, WriteVecLogic, WriteVecLogicY, WriteVecLogicY>; def SchedWriteVecShift - : X86SchedWriteWidths<WriteVecShift, WriteVecShift, - WriteVecShift, WriteVecShift>; + : X86SchedWriteWidths<WriteVecShift, WriteVecShiftX, + WriteVecShiftY, WriteVecShiftY>; +def SchedWriteVecShiftImm + : X86SchedWriteWidths<WriteVecShift, WriteVecShiftImmX, + WriteVecShiftImmY, WriteVecShiftImmY>; def SchedWriteVarVecShift : X86SchedWriteWidths<WriteVarVecShift, WriteVarVecShift, - WriteVarVecShift, WriteVarVecShift>; + WriteVarVecShiftY, WriteVarVecShiftY>; def SchedWriteVecIMul : X86SchedWriteWidths<WriteVecIMul, WriteVecIMul, WriteVecIMulY, WriteVecIMulY>; diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 26bad496e60..da19ad7d508 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -256,6 +256,10 @@ defm : AtomWriteResPair<WriteVecALUY, [AtomPort01], [AtomPort0], 1, 1>; defm : AtomWriteResPair<WriteVecLogic, [AtomPort01], [AtomPort0], 1, 1>; defm : AtomWriteResPair<WriteVecLogicY, [AtomPort01], [AtomPort0], 1, 1>; defm : AtomWriteResPair<WriteVecShift, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>; +defm : AtomWriteResPair<WriteVecShiftX, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>; +defm : AtomWriteResPair<WriteVecShiftY, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>; +defm : AtomWriteResPair<WriteVecShiftImmX, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>; +defm : AtomWriteResPair<WriteVecShiftImmY, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>; defm : AtomWriteResPair<WriteVecIMul, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; defm : AtomWriteResPair<WriteVecIMulY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; defm : AtomWriteResPair<WritePMULLD, [AtomPort01], [AtomPort0], 1, 1>; @@ -276,6 +280,7 @@ defm : AtomWriteResPair<WriteVarBlendY, [AtomPort0], [AtomPort0]>; // NOTE: defm : AtomWriteResPair<WriteShuffle256, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair<WriteVarShuffle256, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair<WriteVarVecShift, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair<WriteVarVecShiftY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom. //////////////////////////////////////////////////////////////////////////////// // Vector insert/extract operations. diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index c35a53e54bd..d930ed00d30 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -406,6 +406,10 @@ def : WriteRes<WriteVecMove, [JFPU01, JVALU]>; defm : JWriteResFpuPair<WriteVecALU, [JFPU01, JVALU], 1>; defm : JWriteResFpuPair<WriteVecALUY, [JFPU01, JVALU], 1>; defm : JWriteResFpuPair<WriteVecShift, [JFPU01, JVALU], 1>; +defm : JWriteResFpuPair<WriteVecShiftX, [JFPU01, JVALU], 1>; +defm : JWriteResFpuPair<WriteVecShiftY, [JFPU01, JVALU], 1>; +defm : JWriteResFpuPair<WriteVecShiftImmX,[JFPU01, JVALU], 1>; +defm : JWriteResFpuPair<WriteVecShiftImmY,[JFPU01, JVALU], 1>; defm : JWriteResFpuPair<WriteVecIMul, [JFPU0, JVIMUL], 2>; defm : JWriteResFpuPair<WriteVecIMulY, [JFPU0, JVIMUL], 2>; defm : JWriteResFpuPair<WritePMULLD, [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>; @@ -428,6 +432,7 @@ defm : JWriteResFpuPair<WriteVecLogicY, [JFPU01, JVALU], 1>; // NOTE: Doesn't defm : JWriteResFpuPair<WriteShuffle256, [JFPU01, JVALU], 1>; defm : JWriteResFpuPair<WriteVarShuffle256, [JFPU01, JVALU], 1>; // NOTE: Doesn't exist on Jaguar. defm : JWriteResFpuPair<WriteVarVecShift, [JFPU01, JVALU], 1>; // NOTE: Doesn't exist on Jaguar. +defm : JWriteResFpuPair<WriteVarVecShiftY,[JFPU01, JVALU], 1>; // NOTE: Doesn't exist on Jaguar. //////////////////////////////////////////////////////////////////////////////// // Vector insert/extract operations. diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 6e7a010f442..51ced28e901 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -162,7 +162,11 @@ def : WriteRes<WriteVecStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>; def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; } def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>; -defm : SLMWriteResPair<WriteVecShift, [SLM_FPC_RSV0], 1>; +defm : SLMWriteResPair<WriteVecShift, [SLM_FPC_RSV0], 1>; +defm : SLMWriteResPair<WriteVecShiftX, [SLM_FPC_RSV0], 1>; +defm : SLMWriteResPair<WriteVecShiftY, [SLM_FPC_RSV0], 1>; +defm : SLMWriteResPair<WriteVecShiftImmX,[SLM_FPC_RSV0], 1>; +defm : SLMWriteResPair<WriteVecShiftImmY,[SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteVecLogic, [SLM_FPC_RSV01], 1>; defm : SLMWriteResPair<WriteVecLogicY,[SLM_FPC_RSV01], 1>; defm : SLMWriteResPair<WriteVecALU, [SLM_FPC_RSV01], 1>; @@ -288,7 +292,6 @@ def : WriteRes<WriteCLMulLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> { let ResourceCycles = [10, 1]; } - def : WriteRes<WriteSystem, [SLM_FPC_RSV0]> { let Latency = 100; } def : WriteRes<WriteMicrocoded, [SLM_FPC_RSV0]> { let Latency = 100; } def : WriteRes<WriteFence, [SLM_MEC_RSV]>; @@ -306,7 +309,8 @@ defm : SLMWriteResPair<WriteFShuffle256, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteFVarShuffle256, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteShuffle256, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteVarShuffle256, [SLM_FPC_RSV0], 1>; -defm : SLMWriteResPair<WriteVarVecShift, [SLM_FPC_RSV0], 1>; +defm : SLMWriteResPair<WriteVarVecShift, [SLM_FPC_RSV0], 1>; +defm : SLMWriteResPair<WriteVarVecShiftY, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteFMA, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteFMAS, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteFMAY, [SLM_FPC_RSV0], 1>; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 2f4a3ef9585..8c4c960ec96 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -235,6 +235,10 @@ def : WriteRes<WriteVecMove, [ZnFPU]>; def : WriteRes<WriteVecLoad, [ZnAGU]> { let Latency = 8; } defm : ZnWriteResFpuPair<WriteVecShift, [ZnFPU], 1>; +defm : ZnWriteResFpuPair<WriteVecShiftX, [ZnFPU], 1>; +defm : ZnWriteResFpuPair<WriteVecShiftY, [ZnFPU], 1>; +defm : ZnWriteResFpuPair<WriteVecShiftImmX, [ZnFPU], 1>; +defm : ZnWriteResFpuPair<WriteVecShiftImmY, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteVecLogic, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteVecLogicY, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WritePHAdd, [ZnFPU], 1>; @@ -258,7 +262,8 @@ defm : ZnWriteResFpuPair<WritePSADBWY, [ZnFPU0], 3>; defm : ZnWriteResFpuPair<WritePHMINPOS, [ZnFPU0], 4>; // Vector Shift Operations -defm : ZnWriteResFpuPair<WriteVarVecShift, [ZnFPU12], 1>; +defm : ZnWriteResFpuPair<WriteVarVecShift, [ZnFPU12], 1>; +defm : ZnWriteResFpuPair<WriteVarVecShiftY, [ZnFPU12], 1>; // Vector insert/extract operations. defm : ZnWriteResFpuPair<WriteVecInsert, [ZnFPU], 1>; |

