diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-04-27 15:50:33 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-04-27 15:50:33 +0000 |
commit | b2aa89c9092cadaa4b865ec95eb93d43029ded04 (patch) | |
tree | 58a86b5609073b71f1f60d824cc99e0951ed6ca1 | |
parent | e3c3c5a7a72ad9afac80fdf0ee34aa1a76ddd5a2 (diff) | |
download | bcm5719-llvm-b2aa89c9092cadaa4b865ec95eb93d43029ded04.tar.gz bcm5719-llvm-b2aa89c9092cadaa4b865ec95eb93d43029ded04.zip |
[X86][AVX] Split WriteFLogic into XMM and YMM/ZMM scheduler classes
This removes all the AND/ANDN/OR/XOR PS/PD InstRW overrides.
llvm-svn: 331051
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 50 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 17 | ||||
-rwxr-xr-x | llvm/lib/Target/X86/X86SchedBroadwell.td | 15 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedHaswell.td | 15 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedSandyBridge.td | 13 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedSkylakeClient.td | 15 | ||||
-rwxr-xr-x | llvm/lib/Target/X86/X86SchedSkylakeServer.td | 29 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86Schedule.td | 5 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleAtom.td | 1 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleBtVer2.td | 39 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleSLM.td | 1 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleZnver1.td | 1 | ||||
-rwxr-xr-x | llvm/test/CodeGen/X86/avx512-schedule.ll | 16 |
13 files changed, 92 insertions, 125 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 6046541d439..c39429ea3b2 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4998,13 +4998,14 @@ multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, Predicate prd, X86FoldableSchedWrite sched, + X86FoldableSchedWrite schedY, bit IsCommutable = 0> { let Predicates = [prd] in { defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info, - sched, IsCommutable>, EVEX_V512, PS, + schedY, IsCommutable>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info, - sched, IsCommutable>, EVEX_V512, PD, VEX_W, + schedY, IsCommutable>, EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; } @@ -5014,13 +5015,13 @@ multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator Op sched, IsCommutable>, EVEX_V128, PS, EVEX_CD8<32, CD8VF>; defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info, - sched, IsCommutable>, EVEX_V256, PS, + schedY, IsCommutable>, EVEX_V256, PS, EVEX_CD8<32, CD8VF>; defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info, sched, IsCommutable>, EVEX_V128, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info, - sched, IsCommutable>, EVEX_V256, PD, VEX_W, + schedY, IsCommutable>, EVEX_V256, PD, VEX_W, EVEX_CD8<64, CD8VF>; } } @@ -5042,26 +5043,37 @@ multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd } defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512, - WriteFAdd, 1>, + WriteFAdd, WriteFAdd, 1>, avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, WriteFAdd>; -defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512, WriteFMul, 1>, +defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512, + WriteFMul, WriteFMul, 1>, avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, WriteFMul>; -defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, WriteFAdd>, +defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, + WriteFAdd, WriteFAdd>, avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, WriteFAdd>; -defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, WriteFDiv>, +defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, + WriteFDiv, WriteFDiv>, avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, WriteFDiv>; -defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, WriteFCmp, 0>, +defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, + WriteFCmp, WriteFCmp, 0>, avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, WriteFCmp>; -defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, WriteFCmp, 0>, +defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, + WriteFCmp, WriteFCmp, 0>, avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, WriteFCmp>; let isCodeGenOnly = 1 in { - defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, WriteFCmp, 1>; - defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, WriteFCmp, 1>; -} -defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI, WriteFLogic, 1>; -defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, WriteFLogic, 0>; -defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI, WriteFLogic, 1>; -defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI, WriteFLogic, 1>; + defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, + WriteFCmp, WriteFCmp, 1>; + defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, + WriteFCmp, WriteFCmp, 1>; +} +defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI, + WriteFLogic, WriteFLogicY, 1>; +defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, + WriteFLogic, WriteFLogicY, 0>; +defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI, + WriteFLogic, WriteFLogicY, 1>; +defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI, + WriteFLogic, WriteFLogicY, 1>; // Patterns catch floating point selects with bitcasted integer logic ops. multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode, @@ -9860,9 +9872,9 @@ def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$sr //===----------------------------------------------------------------------===// defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512, - WriteFShuffle>; + WriteFShuffle, WriteFShuffle>; defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512, - WriteFShuffle>; + WriteFShuffle, WriteFShuffle>; defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, WriteShuffle, HasBWI>; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 68cc4c637f9..a3b75dece71 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -87,6 +87,7 @@ multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, /// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, string OpcodeStr, X86MemOperand x86memop, + X86FoldableSchedWrite sched, list<dag> pat_rr, list<dag> pat_rm, bit Is2Addr = 1> { let isCommutable = 1, hasSideEffects = 0 in @@ -95,14 +96,14 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), pat_rr, d>, - Sched<[WriteFLogic]>; + Sched<[sched]>; let hasSideEffects = 0, mayLoad = 1 in def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), pat_rm, d>, - Sched<[WriteFLogic.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } @@ -2334,29 +2335,29 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, SDNode OpNode> { let Predicates = [HasAVX, NoVLX] in { defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle, - !strconcat(OpcodeStr, "ps"), f256mem, + !strconcat(OpcodeStr, "ps"), f256mem, WriteFLogicY, [], [], 0>, PS, VEX_4V, VEX_L, VEX_WIG; defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble, - !strconcat(OpcodeStr, "pd"), f256mem, + !strconcat(OpcodeStr, "pd"), f256mem, WriteFLogicY, [], [], 0>, PD, VEX_4V, VEX_L, VEX_WIG; defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, - !strconcat(OpcodeStr, "ps"), f128mem, + !strconcat(OpcodeStr, "ps"), f128mem, WriteFLogic, [], [], 0>, PS, VEX_4V, VEX_WIG; defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, - !strconcat(OpcodeStr, "pd"), f128mem, + !strconcat(OpcodeStr, "pd"), f128mem, WriteFLogic, [], [], 0>, PD, VEX_4V, VEX_WIG; } let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, - !strconcat(OpcodeStr, "ps"), f128mem, + !strconcat(OpcodeStr, "ps"), f128mem, WriteFLogic, [], []>, PS; defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, - !strconcat(OpcodeStr, "pd"), f128mem, + !strconcat(OpcodeStr, "pd"), f128mem, WriteFLogic, [], []>, PD; } } diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 72044bf44e9..d21b9bdf224 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -166,7 +166,8 @@ defm : BWWriteResPair<WriteFMA, [BWPort01], 5, [1], 1, 5>; // Fused Multiply defm : BWWriteResPair<WriteFMAS, [BWPort01], 5, [1], 1, 5>; // Fused Multiply Add (Scalar). defm : BWWriteResPair<WriteFMAY, [BWPort01], 5, [1], 1, 6>; // Fused Multiply Add (YMM/ZMM). defm : BWWriteResPair<WriteFSign, [BWPort5], 1>; // Floating point fabs/fchs. -defm : BWWriteResPair<WriteFLogic, [BWPort5], 1>; // Floating point and/or/xor logicals. +defm : BWWriteResPair<WriteFLogic, [BWPort5], 1, [1], 1, 5>; // Floating point and/or/xor logicals. +defm : BWWriteResPair<WriteFLogicY, [BWPort5], 1, [1], 1, 6>; // Floating point and/or/xor logicals (YMM/ZMM). defm : BWWriteResPair<WriteFShuffle, [BWPort5], 1>; // Floating point vector shuffles. defm : BWWriteResPair<WriteFVarShuffle, [BWPort5], 1>; // Floating point vector variable shuffles. defm : BWWriteResPair<WriteFBlend, [BWPort015], 1>; // Floating point vector blends. @@ -1090,13 +1091,7 @@ def BWWriteResGroup75 : SchedWriteRes<[BWPort5,BWPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[BWWriteResGroup75], (instregex "VANDNPDYrm", - "VANDNPSYrm", - "VANDPDYrm", - "VANDPSYrm", - "VORPDYrm", - "VORPSYrm", - "VPACKSSDWYrm", +def: InstRW<[BWWriteResGroup75], (instregex "VPACKSSDWYrm", "VPACKSSWBYrm", "VPACKUSDWYrm", "VPACKUSWBYrm", @@ -1123,9 +1118,7 @@ def: InstRW<[BWWriteResGroup75], (instregex "VANDNPDYrm", "VUNPCKHPDYrm", "VUNPCKHPSYrm", "VUNPCKLPDYrm", - "VUNPCKLPSYrm", - "VXORPDYrm", - "VXORPSYrm")>; + "VUNPCKLPSYrm")>; def BWWriteResGroup76 : SchedWriteRes<[BWPort23,BWPort15]> { let Latency = 7; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index c2ea8b12779..fb4d9b528fb 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -163,7 +163,8 @@ defm : HWWriteResPair<WriteFMA, [HWPort01], 5, [1], 1, 6>; defm : HWWriteResPair<WriteFMAS, [HWPort01], 5, [1], 1, 5>; defm : HWWriteResPair<WriteFMAY, [HWPort01], 5, [1], 1, 7>; defm : HWWriteResPair<WriteFSign, [HWPort0], 1>; -defm : HWWriteResPair<WriteFLogic, [HWPort5], 1, [1], 1, 6>; +defm : HWWriteResPair<WriteFLogic, [HWPort5], 1, [1], 1, 6>; +defm : HWWriteResPair<WriteFLogicY, [HWPort5], 1, [1], 1, 7>; defm : HWWriteResPair<WriteFShuffle, [HWPort5], 1>; defm : HWWriteResPair<WriteFVarShuffle, [HWPort5], 1>; defm : HWWriteResPair<WriteFBlend, [HWPort015], 1, [1], 1, 6>; @@ -910,13 +911,7 @@ def HWWriteResGroup13_1 : SchedWriteRes<[HWPort5,HWPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup13_1], (instregex "VANDNPDYrm", - "VANDNPSYrm", - "VANDPDYrm", - "VANDPSYrm", - "VORPDYrm", - "VORPSYrm", - "VPACKSSDWYrm", +def: InstRW<[HWWriteResGroup13_1], (instregex "VPACKSSDWYrm", "VPACKSSWBYrm", "VPACKUSDWYrm", "VPACKUSWBYrm", @@ -946,9 +941,7 @@ def: InstRW<[HWWriteResGroup13_1], (instregex "VANDNPDYrm", "VUNPCKHPDYrm", "VUNPCKHPSYrm", "VUNPCKLPDYrm", - "VUNPCKLPSYrm", - "VXORPDYrm", - "VXORPSYrm")>; + "VUNPCKLPSYrm")>; def HWWriteResGroup14 : SchedWriteRes<[HWPort6,HWPort23]> { let Latency = 6; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 55475f7ddab..478d8862b84 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -151,6 +151,7 @@ defm : SBWriteResPair<WriteCvtI2F, [SBPort1], 4>; defm : SBWriteResPair<WriteCvtF2F, [SBPort1], 3>; defm : SBWriteResPair<WriteFSign, [SBPort5], 1>; defm : SBWriteResPair<WriteFLogic, [SBPort5], 1, [1], 1, 6>; +defm : SBWriteResPair<WriteFLogicY, [SBPort5], 1, [1], 1, 7>; defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1>; defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1>; defm : SBWriteResPair<WriteFBlend, [SBPort05], 1, [1], 1, 6>; @@ -1142,13 +1143,7 @@ def SBWriteResGroup73 : SchedWriteRes<[SBPort5,SBPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SBWriteResGroup73], (instregex "VANDNPDYrm", - "VANDNPSYrm", - "VANDPDYrm", - "VANDPSYrm", - "VORPDYrm", - "VORPSYrm", - "VPERM2F128rm", +def: InstRW<[SBWriteResGroup73], (instregex "VPERM2F128rm", "VPERMILPDYmi", "VPERMILPDYrm", "VPERMILPSYmi", @@ -1158,9 +1153,7 @@ def: InstRW<[SBWriteResGroup73], (instregex "VANDNPDYrm", "VUNPCKHPDYrm", "VUNPCKHPSYrm", "VUNPCKLPDYrm", - "VUNPCKLPSYrm", - "VXORPDYrm", - "VXORPSYrm")>; + "VUNPCKLPSYrm")>; def SBWriteResGroup74 : SchedWriteRes<[SBPort23,SBPort05]> { let Latency = 8; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index bd7c37da8de..4b51cc8e608 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -163,7 +163,8 @@ defm : SKLWriteResPair<WriteFMA, [SKLPort01], 4, [1], 1, 6>; // Fused Multipl defm : SKLWriteResPair<WriteFMAS, [SKLPort01], 4, [1], 1, 5>; // Fused Multiply Add (Scalar). defm : SKLWriteResPair<WriteFMAY, [SKLPort01], 4, [1], 1, 7>; // Fused Multiply Add (YMM/ZMM). defm : SKLWriteResPair<WriteFSign, [SKLPort0], 1>; // Floating point fabs/fchs. -defm : SKLWriteResPair<WriteFLogic, [SKLPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals. +defm : SKLWriteResPair<WriteFLogic, [SKLPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals. +defm : SKLWriteResPair<WriteFLogicY, [SKLPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM). defm : SKLWriteResPair<WriteFShuffle, [SKLPort5], 1>; // Floating point vector shuffles. defm : SKLWriteResPair<WriteFVarShuffle, [SKLPort5], 1>; // Floating point vector shuffles. defm : SKLWriteResPair<WriteFBlend, [SKLPort015], 1, [1], 1, 6>; // Floating point vector blends. @@ -1624,16 +1625,10 @@ def SKLWriteResGroup110 : SchedWriteRes<[SKLPort23,SKLPort015]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup110], (instregex "VANDNPDYrm", - "VANDNPSYrm", - "VANDPDYrm", - "VANDPSYrm", - "VBLENDPDYrmi", +def: InstRW<[SKLWriteResGroup110], (instregex "VBLENDPDYrmi", "VBLENDPSYrmi", "VMASKMOVPDYrm", "VMASKMOVPSYrm", - "VORPDYrm", - "VORPSYrm", "VPADDBYrm", "VPADDDYrm", "VPADDQYrm", @@ -1648,9 +1643,7 @@ def: InstRW<[SKLWriteResGroup110], (instregex "VANDNPDYrm", "VPSUBDYrm", "VPSUBQYrm", "VPSUBWYrm", - "VPXORYrm", - "VXORPDYrm", - "VXORPSYrm")>; + "VPXORYrm")>; def SKLWriteResGroup112 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { let Latency = 8; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index b74b8f3d1c0..2c25e917d80 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -164,6 +164,7 @@ defm : SKXWriteResPair<WriteFMAS, [SKXPort015], 4, [1], 1, 5>; // Fused Multiply defm : SKXWriteResPair<WriteFMAY, [SKXPort015], 4, [1], 1, 7>; // Fused Multiply Add (YMM/ZMM). defm : SKXWriteResPair<WriteFSign, [SKXPort0], 1>; // Floating point fabs/fchs. defm : SKXWriteResPair<WriteFLogic, [SKXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals. +defm : SKXWriteResPair<WriteFLogicY, [SKXPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM). defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1>; // Floating point vector shuffles. defm : SKXWriteResPair<WriteFVarShuffle, [SKXPort5], 1>; // Floating point vector variable shuffles. defm : SKXWriteResPair<WriteFBlend, [SKXPort015], 1, [1], 1, 6>; // Floating point vector blends. @@ -3165,19 +3166,7 @@ def SKXWriteResGroup121 : SchedWriteRes<[SKXPort23,SKXPort015]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKXWriteResGroup121], (instregex "VANDNPDYrm", - "VANDNPDZ256rm(b?)", - "VANDNPDZrm(b?)", - "VANDNPSYrm", - "VANDNPSZ256rm(b?)", - "VANDNPSZrm(b?)", - "VANDPDYrm", - "VANDPDZ256rm(b?)", - "VANDPDZrm(b?)", - "VANDPSYrm", - "VANDPSZ256rm(b?)", - "VANDPSZrm(b?)", - "VBLENDMPDZ256rm(b?)", +def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPDZ256rm(b?)", "VBLENDMPDZrm(b?)", "VBLENDMPSZ256rm(b?)", "VBLENDMPSZrm(b?)", @@ -3244,12 +3233,6 @@ def: InstRW<[SKXWriteResGroup121], (instregex "VANDNPDYrm", "VMOVUPDZrm(b?)", "VMOVUPSZ256rm(b?)", "VMOVUPSZrm(b?)", - "VORPDYrm", - "VORPDZ256rm(b?)", - "VORPDZrm(b?)", - "VORPSYrm", - "VORPSZ256rm(b?)", - "VORPSZrm(b?)", "VPADDBYrm", "VPADDBZ256rm(b?)", "VPADDBZrm(b?)", @@ -3311,13 +3294,7 @@ def: InstRW<[SKXWriteResGroup121], (instregex "VANDNPDYrm", "VPXORDZrm(b?)", "VPXORQZ256rm(b?)", "VPXORQZrm(b?)", - "VPXORYrm", - "VXORPDYrm", - "VXORPDZ256rm(b?)", - "VXORPDZrm(b?)", - "VXORPSYrm", - "VXORPSZ256rm(b?)", - "VXORPSZrm(b?)")>; + "VPXORYrm")>; def SKXWriteResGroup123 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { let Latency = 8; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 31d14f6a607..e87475a96ca 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -89,8 +89,9 @@ defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root e defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. defm WriteFMAS : X86SchedWritePair; // Fused Multiply Add (Scalar). defm WriteFMAY : X86SchedWritePair; // Fused Multiply Add (YMM/ZMM). -defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs. -defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals. +defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs. +defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals. +defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM/ZMM). defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles. defm WriteFVarShuffle : X86SchedWritePair; // Floating point vector variable shuffles. defm WriteFBlend : X86SchedWritePair; // Floating point vector blends. diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 9dd23b6c3d6..23ede686ddd 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -212,6 +212,7 @@ defm : AtomWriteResPair<WriteFDiv, [AtomPort01], [AtomPort01], 34, 34, defm : AtomWriteResPair<WriteFSqrt, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>; defm : AtomWriteResPair<WriteFSign, [AtomPort1], [AtomPort1]>; defm : AtomWriteResPair<WriteFLogic, [AtomPort01], [AtomPort0]>; +defm : AtomWriteResPair<WriteFLogicY, [AtomPort01], [AtomPort0]>; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair<WriteFShuffle, [AtomPort0], [AtomPort0]>; defm : AtomWriteResPair<WriteFVarShuffle, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair<WriteFMA, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom. diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 6f8eb9b5c60..28d54b9bf7d 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -129,6 +129,25 @@ multiclass JWriteResFpuPair<X86FoldableSchedWrite SchedRW, } } +multiclass JWriteResYMMPair<X86FoldableSchedWrite SchedRW, + list<ProcResourceKind> ExePorts, + int Lat, list<int> Res = [2], int UOps = 2> { + // Register variant is using a single cycle on ExePort. + def : WriteRes<SchedRW, ExePorts> { + let Latency = Lat; + let ResourceCycles = Res; + let NumMicroOps = UOps; + } + + // Memory variant also uses 2 cycles on JLAGU and adds 5 cycles to the + // latency. + def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> { + let Latency = !add(Lat, 5); + let ResourceCycles = !listconcat([2], Res); + let NumMicroOps = UOps; + } +} + // A folded store needs a cycle on the SAGU for the store data. def : WriteRes<WriteRMW, [JSAGU]>; @@ -309,6 +328,7 @@ defm : JWriteResFpuPair<WriteFDiv, [JFPU1, JFPM], 19, [1, 19]>; defm : JWriteResFpuPair<WriteFSqrt, [JFPU1, JFPM], 21, [1, 21]>; defm : JWriteResFpuPair<WriteFSign, [JFPU1, JFPM], 2>; defm : JWriteResFpuPair<WriteFLogic, [JFPU01, JFPX], 1>; +defm : JWriteResYMMPair<WriteFLogicY, [JFPU01, JFPX], 1, [2, 2], 2>; defm : JWriteResFpuPair<WriteFShuffle, [JFPU01, JFPX], 1>; defm : JWriteResFpuPair<WriteFVarShuffle, [JFPU01, JFPX], 2, [1, 4], 3>; defm : JWriteResFpuPair<WriteFBlend, [JFPU01, JFPX], 1>; @@ -527,25 +547,6 @@ def : InstRW<[JWriteCVTPH2PSYLd], (instrs VCVTPH2PSYrm)>; // AVX instructions. //////////////////////////////////////////////////////////////////////////////// -def JWriteFLogicY: SchedWriteRes<[JFPU01, JFPX]> { - let ResourceCycles = [2, 2]; - let NumMicroOps = 2; -} -def : InstRW<[JWriteFLogicY], (instrs VORPDYrr, VORPSYrr, - VXORPDYrr, VXORPSYrr, - VANDPDYrr, VANDPSYrr, - VANDNPDYrr, VANDNPSYrr)>; - -def JWriteFLogicYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> { - let Latency = 6; - let ResourceCycles = [2, 2, 2]; - let NumMicroOps = 2; -} -def : InstRW<[JWriteFLogicYLd, ReadAfterLd], (instrs VORPDYrm, VORPSYrm, - VXORPDYrm, VXORPSYrm, - VANDPDYrm, VANDPSYrm, - VANDNPDYrm, VANDNPSYrm)>; - def JWriteVDPPSY: SchedWriteRes<[JFPU1, JFPM, JFPA]> { let Latency = 12; let ResourceCycles = [2, 6, 6]; diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 8663d2ff6c6..0504519a480 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -142,6 +142,7 @@ defm : SLMWriteResPair<WriteCvtI2F, [SLM_FPC_RSV01], 4>; defm : SLMWriteResPair<WriteCvtF2F, [SLM_FPC_RSV01], 4>; defm : SLMWriteResPair<WriteFSign, [SLM_FPC_RSV01], 1>; defm : SLMWriteResPair<WriteFLogic, [SLM_FPC_RSV01], 1>; +defm : SLMWriteResPair<WriteFLogicY, [SLM_FPC_RSV01], 1>; defm : SLMWriteResPair<WriteFShuffle, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 98366770c82..83358a7f24f 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -204,6 +204,7 @@ defm : ZnWriteResFpuPair<WriteCvtF2I, [ZnFPU3], 5>; defm : ZnWriteResFpuPair<WriteFDiv, [ZnFPU3], 15>; defm : ZnWriteResFpuPair<WriteFSign, [ZnFPU3], 2>; defm : ZnWriteResFpuPair<WriteFLogic, [ZnFPU], 1>; +defm : ZnWriteResFpuPair<WriteFLogicY, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteFShuffle, [ZnFPU12], 1>; defm : ZnWriteResFpuPair<WriteFVarShuffle, [ZnFPU12], 1>; defm : ZnWriteResFpuPair<WriteFMul, [ZnFPU0], 5>; diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll index c39827c51cc..7fbc5177495 100755 --- a/llvm/test/CodeGen/X86/avx512-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-schedule.ll @@ -657,7 +657,7 @@ define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind { define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { ; GENERIC-LABEL: orq_broadcast: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: orq_broadcast: @@ -671,7 +671,7 @@ define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { ; GENERIC-LABEL: andd512fold: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: andd512fold: @@ -687,7 +687,7 @@ entry: define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) { ; GENERIC-LABEL: andqbrst: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: andqbrst: @@ -994,7 +994,7 @@ define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j, define <16 x float> @test_fxor(<16 x float> %a) { ; GENERIC-LABEL: test_fxor: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_fxor: @@ -1009,7 +1009,7 @@ define <16 x float> @test_fxor(<16 x float> %a) { define <8 x float> @test_fxor_8f32(<8 x float> %a) { ; GENERIC-LABEL: test_fxor_8f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_fxor_8f32: @@ -1023,7 +1023,7 @@ define <8 x float> @test_fxor_8f32(<8 x float> %a) { define <8 x double> @fabs_v8f64(<8 x double> %p) ; GENERIC-LABEL: fabs_v8f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: fabs_v8f64: @@ -1039,7 +1039,7 @@ declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p) define <16 x float> @fabs_v16f32(<16 x float> %p) ; GENERIC-LABEL: fabs_v16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: fabs_v16f32: @@ -4809,7 +4809,7 @@ define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <1 ; GENERIC-LABEL: test_x86_fnmsub_ps_z: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] -; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00] +; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] ; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; |