summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2018-04-27 15:50:33 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2018-04-27 15:50:33 +0000
commitb2aa89c9092cadaa4b865ec95eb93d43029ded04 (patch)
tree58a86b5609073b71f1f60d824cc99e0951ed6ca1
parente3c3c5a7a72ad9afac80fdf0ee34aa1a76ddd5a2 (diff)
downloadbcm5719-llvm-b2aa89c9092cadaa4b865ec95eb93d43029ded04.tar.gz
bcm5719-llvm-b2aa89c9092cadaa4b865ec95eb93d43029ded04.zip
[X86][AVX] Split WriteFLogic into XMM and YMM/ZMM scheduler classes
This removes all the AND/ANDN/OR/XOR PS/PD InstRW overrides. llvm-svn: 331051
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td50
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td17
-rwxr-xr-xllvm/lib/Target/X86/X86SchedBroadwell.td15
-rw-r--r--llvm/lib/Target/X86/X86SchedHaswell.td15
-rw-r--r--llvm/lib/Target/X86/X86SchedSandyBridge.td13
-rw-r--r--llvm/lib/Target/X86/X86SchedSkylakeClient.td15
-rwxr-xr-xllvm/lib/Target/X86/X86SchedSkylakeServer.td29
-rw-r--r--llvm/lib/Target/X86/X86Schedule.td5
-rw-r--r--llvm/lib/Target/X86/X86ScheduleAtom.td1
-rw-r--r--llvm/lib/Target/X86/X86ScheduleBtVer2.td39
-rw-r--r--llvm/lib/Target/X86/X86ScheduleSLM.td1
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver1.td1
-rwxr-xr-xllvm/test/CodeGen/X86/avx512-schedule.ll16
13 files changed, 92 insertions, 125 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 6046541d439..c39429ea3b2 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -4998,13 +4998,14 @@ multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
Predicate prd, X86FoldableSchedWrite sched,
+ X86FoldableSchedWrite schedY,
bit IsCommutable = 0> {
let Predicates = [prd] in {
defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
- sched, IsCommutable>, EVEX_V512, PS,
+ schedY, IsCommutable>, EVEX_V512, PS,
EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
- sched, IsCommutable>, EVEX_V512, PD, VEX_W,
+ schedY, IsCommutable>, EVEX_V512, PD, VEX_W,
EVEX_CD8<64, CD8VF>;
}
@@ -5014,13 +5015,13 @@ multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator Op
sched, IsCommutable>, EVEX_V128, PS,
EVEX_CD8<32, CD8VF>;
defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
- sched, IsCommutable>, EVEX_V256, PS,
+ schedY, IsCommutable>, EVEX_V256, PS,
EVEX_CD8<32, CD8VF>;
defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
sched, IsCommutable>, EVEX_V128, PD, VEX_W,
EVEX_CD8<64, CD8VF>;
defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
- sched, IsCommutable>, EVEX_V256, PD, VEX_W,
+ schedY, IsCommutable>, EVEX_V256, PD, VEX_W,
EVEX_CD8<64, CD8VF>;
}
}
@@ -5042,26 +5043,37 @@ multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd
}
defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
- WriteFAdd, 1>,
+ WriteFAdd, WriteFAdd, 1>,
avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, WriteFAdd>;
-defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512, WriteFMul, 1>,
+defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
+ WriteFMul, WriteFMul, 1>,
avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, WriteFMul>;
-defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, WriteFAdd>,
+defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512,
+ WriteFAdd, WriteFAdd>,
avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, WriteFAdd>;
-defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, WriteFDiv>,
+defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512,
+ WriteFDiv, WriteFDiv>,
avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, WriteFDiv>;
-defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, WriteFCmp, 0>,
+defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
+ WriteFCmp, WriteFCmp, 0>,
avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, WriteFCmp>;
-defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, WriteFCmp, 0>,
+defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
+ WriteFCmp, WriteFCmp, 0>,
avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, WriteFCmp>;
let isCodeGenOnly = 1 in {
- defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, WriteFCmp, 1>;
- defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, WriteFCmp, 1>;
-}
-defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI, WriteFLogic, 1>;
-defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, WriteFLogic, 0>;
-defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI, WriteFLogic, 1>;
-defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI, WriteFLogic, 1>;
+ defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
+ WriteFCmp, WriteFCmp, 1>;
+ defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
+ WriteFCmp, WriteFCmp, 1>;
+}
+defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
+ WriteFLogic, WriteFLogicY, 1>;
+defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
+ WriteFLogic, WriteFLogicY, 0>;
+defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
+ WriteFLogic, WriteFLogicY, 1>;
+defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
+ WriteFLogic, WriteFLogicY, 1>;
// Patterns catch floating point selects with bitcasted integer logic ops.
multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
@@ -9860,9 +9872,9 @@ def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$sr
//===----------------------------------------------------------------------===//
defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
- WriteFShuffle>;
+ WriteFShuffle, WriteFShuffle>;
defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
- WriteFShuffle>;
+ WriteFShuffle, WriteFShuffle>;
defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
WriteShuffle, HasBWI>;
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 68cc4c637f9..a3b75dece71 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -87,6 +87,7 @@ multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
string OpcodeStr, X86MemOperand x86memop,
+ X86FoldableSchedWrite sched,
list<dag> pat_rr, list<dag> pat_rm,
bit Is2Addr = 1> {
let isCommutable = 1, hasSideEffects = 0 in
@@ -95,14 +96,14 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
pat_rr, d>,
- Sched<[WriteFLogic]>;
+ Sched<[sched]>;
let hasSideEffects = 0, mayLoad = 1 in
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
pat_rm, d>,
- Sched<[WriteFLogic.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, ReadAfterLd]>;
}
@@ -2334,29 +2335,29 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
let Predicates = [HasAVX, NoVLX] in {
defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
- !strconcat(OpcodeStr, "ps"), f256mem,
+ !strconcat(OpcodeStr, "ps"), f256mem, WriteFLogicY,
[], [], 0>, PS, VEX_4V, VEX_L, VEX_WIG;
defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
- !strconcat(OpcodeStr, "pd"), f256mem,
+ !strconcat(OpcodeStr, "pd"), f256mem, WriteFLogicY,
[], [], 0>, PD, VEX_4V, VEX_L, VEX_WIG;
defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
- !strconcat(OpcodeStr, "ps"), f128mem,
+ !strconcat(OpcodeStr, "ps"), f128mem, WriteFLogic,
[], [], 0>, PS, VEX_4V, VEX_WIG;
defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
- !strconcat(OpcodeStr, "pd"), f128mem,
+ !strconcat(OpcodeStr, "pd"), f128mem, WriteFLogic,
[], [], 0>, PD, VEX_4V, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
- !strconcat(OpcodeStr, "ps"), f128mem,
+ !strconcat(OpcodeStr, "ps"), f128mem, WriteFLogic,
[], []>, PS;
defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
- !strconcat(OpcodeStr, "pd"), f128mem,
+ !strconcat(OpcodeStr, "pd"), f128mem, WriteFLogic,
[], []>, PD;
}
}
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index 72044bf44e9..d21b9bdf224 100755
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -166,7 +166,8 @@ defm : BWWriteResPair<WriteFMA, [BWPort01], 5, [1], 1, 5>; // Fused Multiply
defm : BWWriteResPair<WriteFMAS, [BWPort01], 5, [1], 1, 5>; // Fused Multiply Add (Scalar).
defm : BWWriteResPair<WriteFMAY, [BWPort01], 5, [1], 1, 6>; // Fused Multiply Add (YMM/ZMM).
defm : BWWriteResPair<WriteFSign, [BWPort5], 1>; // Floating point fabs/fchs.
-defm : BWWriteResPair<WriteFLogic, [BWPort5], 1>; // Floating point and/or/xor logicals.
+defm : BWWriteResPair<WriteFLogic, [BWPort5], 1, [1], 1, 5>; // Floating point and/or/xor logicals.
+defm : BWWriteResPair<WriteFLogicY, [BWPort5], 1, [1], 1, 6>; // Floating point and/or/xor logicals (YMM/ZMM).
defm : BWWriteResPair<WriteFShuffle, [BWPort5], 1>; // Floating point vector shuffles.
defm : BWWriteResPair<WriteFVarShuffle, [BWPort5], 1>; // Floating point vector variable shuffles.
defm : BWWriteResPair<WriteFBlend, [BWPort015], 1>; // Floating point vector blends.
@@ -1090,13 +1091,7 @@ def BWWriteResGroup75 : SchedWriteRes<[BWPort5,BWPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup75], (instregex "VANDNPDYrm",
- "VANDNPSYrm",
- "VANDPDYrm",
- "VANDPSYrm",
- "VORPDYrm",
- "VORPSYrm",
- "VPACKSSDWYrm",
+def: InstRW<[BWWriteResGroup75], (instregex "VPACKSSDWYrm",
"VPACKSSWBYrm",
"VPACKUSDWYrm",
"VPACKUSWBYrm",
@@ -1123,9 +1118,7 @@ def: InstRW<[BWWriteResGroup75], (instregex "VANDNPDYrm",
"VUNPCKHPDYrm",
"VUNPCKHPSYrm",
"VUNPCKLPDYrm",
- "VUNPCKLPSYrm",
- "VXORPDYrm",
- "VXORPSYrm")>;
+ "VUNPCKLPSYrm")>;
def BWWriteResGroup76 : SchedWriteRes<[BWPort23,BWPort15]> {
let Latency = 7;
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index c2ea8b12779..fb4d9b528fb 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -163,7 +163,8 @@ defm : HWWriteResPair<WriteFMA, [HWPort01], 5, [1], 1, 6>;
defm : HWWriteResPair<WriteFMAS, [HWPort01], 5, [1], 1, 5>;
defm : HWWriteResPair<WriteFMAY, [HWPort01], 5, [1], 1, 7>;
defm : HWWriteResPair<WriteFSign, [HWPort0], 1>;
-defm : HWWriteResPair<WriteFLogic, [HWPort5], 1, [1], 1, 6>;
+defm : HWWriteResPair<WriteFLogic, [HWPort5], 1, [1], 1, 6>;
+defm : HWWriteResPair<WriteFLogicY, [HWPort5], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteFShuffle, [HWPort5], 1>;
defm : HWWriteResPair<WriteFVarShuffle, [HWPort5], 1>;
defm : HWWriteResPair<WriteFBlend, [HWPort015], 1, [1], 1, 6>;
@@ -910,13 +911,7 @@ def HWWriteResGroup13_1 : SchedWriteRes<[HWPort5,HWPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup13_1], (instregex "VANDNPDYrm",
- "VANDNPSYrm",
- "VANDPDYrm",
- "VANDPSYrm",
- "VORPDYrm",
- "VORPSYrm",
- "VPACKSSDWYrm",
+def: InstRW<[HWWriteResGroup13_1], (instregex "VPACKSSDWYrm",
"VPACKSSWBYrm",
"VPACKUSDWYrm",
"VPACKUSWBYrm",
@@ -946,9 +941,7 @@ def: InstRW<[HWWriteResGroup13_1], (instregex "VANDNPDYrm",
"VUNPCKHPDYrm",
"VUNPCKHPSYrm",
"VUNPCKLPDYrm",
- "VUNPCKLPSYrm",
- "VXORPDYrm",
- "VXORPSYrm")>;
+ "VUNPCKLPSYrm")>;
def HWWriteResGroup14 : SchedWriteRes<[HWPort6,HWPort23]> {
let Latency = 6;
diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td
index 55475f7ddab..478d8862b84 100644
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -151,6 +151,7 @@ defm : SBWriteResPair<WriteCvtI2F, [SBPort1], 4>;
defm : SBWriteResPair<WriteCvtF2F, [SBPort1], 3>;
defm : SBWriteResPair<WriteFSign, [SBPort5], 1>;
defm : SBWriteResPair<WriteFLogic, [SBPort5], 1, [1], 1, 6>;
+defm : SBWriteResPair<WriteFLogicY, [SBPort5], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1>;
defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1>;
defm : SBWriteResPair<WriteFBlend, [SBPort05], 1, [1], 1, 6>;
@@ -1142,13 +1143,7 @@ def SBWriteResGroup73 : SchedWriteRes<[SBPort5,SBPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SBWriteResGroup73], (instregex "VANDNPDYrm",
- "VANDNPSYrm",
- "VANDPDYrm",
- "VANDPSYrm",
- "VORPDYrm",
- "VORPSYrm",
- "VPERM2F128rm",
+def: InstRW<[SBWriteResGroup73], (instregex "VPERM2F128rm",
"VPERMILPDYmi",
"VPERMILPDYrm",
"VPERMILPSYmi",
@@ -1158,9 +1153,7 @@ def: InstRW<[SBWriteResGroup73], (instregex "VANDNPDYrm",
"VUNPCKHPDYrm",
"VUNPCKHPSYrm",
"VUNPCKLPDYrm",
- "VUNPCKLPSYrm",
- "VXORPDYrm",
- "VXORPSYrm")>;
+ "VUNPCKLPSYrm")>;
def SBWriteResGroup74 : SchedWriteRes<[SBPort23,SBPort05]> {
let Latency = 8;
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index bd7c37da8de..4b51cc8e608 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -163,7 +163,8 @@ defm : SKLWriteResPair<WriteFMA, [SKLPort01], 4, [1], 1, 6>; // Fused Multipl
defm : SKLWriteResPair<WriteFMAS, [SKLPort01], 4, [1], 1, 5>; // Fused Multiply Add (Scalar).
defm : SKLWriteResPair<WriteFMAY, [SKLPort01], 4, [1], 1, 7>; // Fused Multiply Add (YMM/ZMM).
defm : SKLWriteResPair<WriteFSign, [SKLPort0], 1>; // Floating point fabs/fchs.
-defm : SKLWriteResPair<WriteFLogic, [SKLPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
+defm : SKLWriteResPair<WriteFLogic, [SKLPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
+defm : SKLWriteResPair<WriteFLogicY, [SKLPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
defm : SKLWriteResPair<WriteFShuffle, [SKLPort5], 1>; // Floating point vector shuffles.
defm : SKLWriteResPair<WriteFVarShuffle, [SKLPort5], 1>; // Floating point vector shuffles.
defm : SKLWriteResPair<WriteFBlend, [SKLPort015], 1, [1], 1, 6>; // Floating point vector blends.
@@ -1624,16 +1625,10 @@ def SKLWriteResGroup110 : SchedWriteRes<[SKLPort23,SKLPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup110], (instregex "VANDNPDYrm",
- "VANDNPSYrm",
- "VANDPDYrm",
- "VANDPSYrm",
- "VBLENDPDYrmi",
+def: InstRW<[SKLWriteResGroup110], (instregex "VBLENDPDYrmi",
"VBLENDPSYrmi",
"VMASKMOVPDYrm",
"VMASKMOVPSYrm",
- "VORPDYrm",
- "VORPSYrm",
"VPADDBYrm",
"VPADDDYrm",
"VPADDQYrm",
@@ -1648,9 +1643,7 @@ def: InstRW<[SKLWriteResGroup110], (instregex "VANDNPDYrm",
"VPSUBDYrm",
"VPSUBQYrm",
"VPSUBWYrm",
- "VPXORYrm",
- "VXORPDYrm",
- "VXORPSYrm")>;
+ "VPXORYrm")>;
def SKLWriteResGroup112 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> {
let Latency = 8;
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index b74b8f3d1c0..2c25e917d80 100755
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -164,6 +164,7 @@ defm : SKXWriteResPair<WriteFMAS, [SKXPort015], 4, [1], 1, 5>; // Fused Multiply
defm : SKXWriteResPair<WriteFMAY, [SKXPort015], 4, [1], 1, 7>; // Fused Multiply Add (YMM/ZMM).
defm : SKXWriteResPair<WriteFSign, [SKXPort0], 1>; // Floating point fabs/fchs.
defm : SKXWriteResPair<WriteFLogic, [SKXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
+defm : SKXWriteResPair<WriteFLogicY, [SKXPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM).
defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1>; // Floating point vector shuffles.
defm : SKXWriteResPair<WriteFVarShuffle, [SKXPort5], 1>; // Floating point vector variable shuffles.
defm : SKXWriteResPair<WriteFBlend, [SKXPort015], 1, [1], 1, 6>; // Floating point vector blends.
@@ -3165,19 +3166,7 @@ def SKXWriteResGroup121 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup121], (instregex "VANDNPDYrm",
- "VANDNPDZ256rm(b?)",
- "VANDNPDZrm(b?)",
- "VANDNPSYrm",
- "VANDNPSZ256rm(b?)",
- "VANDNPSZrm(b?)",
- "VANDPDYrm",
- "VANDPDZ256rm(b?)",
- "VANDPDZrm(b?)",
- "VANDPSYrm",
- "VANDPSZ256rm(b?)",
- "VANDPSZrm(b?)",
- "VBLENDMPDZ256rm(b?)",
+def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPDZ256rm(b?)",
"VBLENDMPDZrm(b?)",
"VBLENDMPSZ256rm(b?)",
"VBLENDMPSZrm(b?)",
@@ -3244,12 +3233,6 @@ def: InstRW<[SKXWriteResGroup121], (instregex "VANDNPDYrm",
"VMOVUPDZrm(b?)",
"VMOVUPSZ256rm(b?)",
"VMOVUPSZrm(b?)",
- "VORPDYrm",
- "VORPDZ256rm(b?)",
- "VORPDZrm(b?)",
- "VORPSYrm",
- "VORPSZ256rm(b?)",
- "VORPSZrm(b?)",
"VPADDBYrm",
"VPADDBZ256rm(b?)",
"VPADDBZrm(b?)",
@@ -3311,13 +3294,7 @@ def: InstRW<[SKXWriteResGroup121], (instregex "VANDNPDYrm",
"VPXORDZrm(b?)",
"VPXORQZ256rm(b?)",
"VPXORQZrm(b?)",
- "VPXORYrm",
- "VXORPDYrm",
- "VXORPDZ256rm(b?)",
- "VXORPDZrm(b?)",
- "VXORPSYrm",
- "VXORPSZ256rm(b?)",
- "VXORPSZrm(b?)")>;
+ "VPXORYrm")>;
def SKXWriteResGroup123 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
let Latency = 8;
diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td
index 31d14f6a607..e87475a96ca 100644
--- a/llvm/lib/Target/X86/X86Schedule.td
+++ b/llvm/lib/Target/X86/X86Schedule.td
@@ -89,8 +89,9 @@ defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root e
defm WriteFMA : X86SchedWritePair; // Fused Multiply Add.
defm WriteFMAS : X86SchedWritePair; // Fused Multiply Add (Scalar).
defm WriteFMAY : X86SchedWritePair; // Fused Multiply Add (YMM/ZMM).
-defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs.
-defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals.
+defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs.
+defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals.
+defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM/ZMM).
defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles.
defm WriteFVarShuffle : X86SchedWritePair; // Floating point vector variable shuffles.
defm WriteFBlend : X86SchedWritePair; // Floating point vector blends.
diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td
index 9dd23b6c3d6..23ede686ddd 100644
--- a/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -212,6 +212,7 @@ defm : AtomWriteResPair<WriteFDiv, [AtomPort01], [AtomPort01], 34, 34,
defm : AtomWriteResPair<WriteFSqrt, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
defm : AtomWriteResPair<WriteFSign, [AtomPort1], [AtomPort1]>;
defm : AtomWriteResPair<WriteFLogic, [AtomPort01], [AtomPort0]>;
+defm : AtomWriteResPair<WriteFLogicY, [AtomPort01], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFShuffle, [AtomPort0], [AtomPort0]>;
defm : AtomWriteResPair<WriteFVarShuffle, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteFMA, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index 6f8eb9b5c60..28d54b9bf7d 100644
--- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -129,6 +129,25 @@ multiclass JWriteResFpuPair<X86FoldableSchedWrite SchedRW,
}
}
+multiclass JWriteResYMMPair<X86FoldableSchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts,
+ int Lat, list<int> Res = [2], int UOps = 2> {
+ // Register variant is using a single cycle on ExePort.
+ def : WriteRes<SchedRW, ExePorts> {
+ let Latency = Lat;
+ let ResourceCycles = Res;
+ let NumMicroOps = UOps;
+ }
+
+ // Memory variant also uses 2 cycles on JLAGU and adds 5 cycles to the
+ // latency.
+ def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
+ let Latency = !add(Lat, 5);
+ let ResourceCycles = !listconcat([2], Res);
+ let NumMicroOps = UOps;
+ }
+}
+
// A folded store needs a cycle on the SAGU for the store data.
def : WriteRes<WriteRMW, [JSAGU]>;
@@ -309,6 +328,7 @@ defm : JWriteResFpuPair<WriteFDiv, [JFPU1, JFPM], 19, [1, 19]>;
defm : JWriteResFpuPair<WriteFSqrt, [JFPU1, JFPM], 21, [1, 21]>;
defm : JWriteResFpuPair<WriteFSign, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFLogic, [JFPU01, JFPX], 1>;
+defm : JWriteResYMMPair<WriteFLogicY, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : JWriteResFpuPair<WriteFShuffle, [JFPU01, JFPX], 1>;
defm : JWriteResFpuPair<WriteFVarShuffle, [JFPU01, JFPX], 2, [1, 4], 3>;
defm : JWriteResFpuPair<WriteFBlend, [JFPU01, JFPX], 1>;
@@ -527,25 +547,6 @@ def : InstRW<[JWriteCVTPH2PSYLd], (instrs VCVTPH2PSYrm)>;
// AVX instructions.
////////////////////////////////////////////////////////////////////////////////
-def JWriteFLogicY: SchedWriteRes<[JFPU01, JFPX]> {
- let ResourceCycles = [2, 2];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteFLogicY], (instrs VORPDYrr, VORPSYrr,
- VXORPDYrr, VXORPSYrr,
- VANDPDYrr, VANDPSYrr,
- VANDNPDYrr, VANDNPSYrr)>;
-
-def JWriteFLogicYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
- let Latency = 6;
- let ResourceCycles = [2, 2, 2];
- let NumMicroOps = 2;
-}
-def : InstRW<[JWriteFLogicYLd, ReadAfterLd], (instrs VORPDYrm, VORPSYrm,
- VXORPDYrm, VXORPSYrm,
- VANDPDYrm, VANDPSYrm,
- VANDNPDYrm, VANDNPSYrm)>;
-
def JWriteVDPPSY: SchedWriteRes<[JFPU1, JFPM, JFPA]> {
let Latency = 12;
let ResourceCycles = [2, 6, 6];
diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td
index 8663d2ff6c6..0504519a480 100644
--- a/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -142,6 +142,7 @@ defm : SLMWriteResPair<WriteCvtI2F, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtF2F, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteFSign, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFLogic, [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WriteFLogicY, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>;
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 98366770c82..83358a7f24f 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -204,6 +204,7 @@ defm : ZnWriteResFpuPair<WriteCvtF2I, [ZnFPU3], 5>;
defm : ZnWriteResFpuPair<WriteFDiv, [ZnFPU3], 15>;
defm : ZnWriteResFpuPair<WriteFSign, [ZnFPU3], 2>;
defm : ZnWriteResFpuPair<WriteFLogic, [ZnFPU], 1>;
+defm : ZnWriteResFpuPair<WriteFLogicY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteFShuffle, [ZnFPU12], 1>;
defm : ZnWriteResFpuPair<WriteFVarShuffle, [ZnFPU12], 1>;
defm : ZnWriteResFpuPair<WriteFMul, [ZnFPU0], 5>;
diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll
index c39827c51cc..7fbc5177495 100755
--- a/llvm/test/CodeGen/X86/avx512-schedule.ll
+++ b/llvm/test/CodeGen/X86/avx512-schedule.ll
@@ -657,7 +657,7 @@ define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
; GENERIC-LABEL: orq_broadcast:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: orq_broadcast:
@@ -671,7 +671,7 @@ define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
; GENERIC-LABEL: andd512fold:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: andd512fold:
@@ -687,7 +687,7 @@ entry:
define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
; GENERIC-LABEL: andqbrst:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: andqbrst:
@@ -994,7 +994,7 @@ define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
define <16 x float> @test_fxor(<16 x float> %a) {
; GENERIC-LABEL: test_fxor:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_fxor:
@@ -1009,7 +1009,7 @@ define <16 x float> @test_fxor(<16 x float> %a) {
define <8 x float> @test_fxor_8f32(<8 x float> %a) {
; GENERIC-LABEL: test_fxor_8f32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [7:1.00]
+; GENERIC-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_fxor_8f32:
@@ -1023,7 +1023,7 @@ define <8 x float> @test_fxor_8f32(<8 x float> %a) {
define <8 x double> @fabs_v8f64(<8 x double> %p)
; GENERIC-LABEL: fabs_v8f64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: fabs_v8f64:
@@ -1039,7 +1039,7 @@ declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
define <16 x float> @fabs_v16f32(<16 x float> %p)
; GENERIC-LABEL: fabs_v16f32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: fabs_v16f32:
@@ -4809,7 +4809,7 @@ define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <1
; GENERIC-LABEL: test_x86_fnmsub_ps_z:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
-; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
+; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
OpenPOWER on IntegriCloud