From 542b20d6566705f0f9aa46a37b4c65f9fde5d34d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 3 May 2018 22:31:19 +0000 Subject: [X86] Add WriteDPPD/WriteDPPS dot product scheduler classes llvm-svn: 331489 --- llvm/lib/Target/X86/X86InstrSSE.td | 10 +++--- llvm/lib/Target/X86/X86SchedBroadwell.td | 38 ++------------------- llvm/lib/Target/X86/X86SchedHaswell.td | 38 ++------------------- llvm/lib/Target/X86/X86SchedSandyBridge.td | 38 ++------------------- llvm/lib/Target/X86/X86SchedSkylakeClient.td | 38 ++------------------- llvm/lib/Target/X86/X86SchedSkylakeServer.td | 40 ++--------------------- llvm/lib/Target/X86/X86Schedule.td | 7 ++++ llvm/lib/Target/X86/X86ScheduleAtom.td | 3 ++ llvm/lib/Target/X86/X86ScheduleBtVer2.td | 49 ++-------------------------- llvm/lib/Target/X86/X86ScheduleSLM.td | 3 ++ llvm/lib/Target/X86/X86ScheduleZnver1.td | 10 +++--- 11 files changed, 42 insertions(+), 232 deletions(-) (limited to 'llvm/lib') diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index d668963bb22..f650dc80399 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -6026,15 +6026,15 @@ let Predicates = [HasAVX] in { let ExeDomain = SSEPackedSingle in defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, VR128, loadv4f32, f128mem, 0, - SchedWriteFAdd.XMM>, VEX_4V, VEX_WIG; + SchedWriteDPPS.XMM>, VEX_4V, VEX_WIG; let ExeDomain = SSEPackedDouble in defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, VR128, loadv2f64, f128mem, 0, - SchedWriteFAdd.XMM>, VEX_4V, VEX_WIG; + SchedWriteDPPD.XMM>, VEX_4V, VEX_WIG; let ExeDomain = SSEPackedSingle in defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256, VR256, loadv8f32, i256mem, 0, - SchedWriteFAdd.YMM>, VEX_4V, VEX_L, VEX_WIG; + SchedWriteDPPS.YMM>, VEX_4V, VEX_L, VEX_WIG; } let Predicates = [HasAVX2] in { @@ -6055,11 +6055,11 @@ let Constraints = "$src1 = $dst" in { let ExeDomain = SSEPackedSingle in defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, VR128, memopv4f32, f128mem, 1, - SchedWriteFAdd.XMM>; + SchedWriteDPPS.XMM>; let ExeDomain = SSEPackedDouble in defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, VR128, memopv2f64, f128mem, 1, - SchedWriteFAdd.XMM>; + SchedWriteDPPD.XMM>; } /// SS41I_blend_rmi - SSE 4.1 blend with 8-bit immediate diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index af7f2acf0ce..b6ad9dfe9c6 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -173,6 +173,9 @@ defm : BWWriteResPair; // Floating point defm : BWWriteResPair; // Fused Multiply Add. defm : BWWriteResPair; // Fused Multiply Add (Scalar). defm : BWWriteResPair; // Fused Multiply Add (YMM/ZMM). +defm : BWWriteResPair; // Floating point double dot product. +defm : BWWriteResPair; // Floating point single dot product. +defm : BWWriteResPair; // Floating point single dot product (YMM). defm : BWWriteResPair; // Floating point fabs/fchs. defm : BWWriteResPair; // Floating point and/or/xor logicals. defm : BWWriteResPair; // Floating point and/or/xor logicals (YMM/ZMM). @@ -1267,13 +1270,6 @@ def: InstRW<[BWWriteResGroup102], (instregex "VPERM2F128rm", "VPMOVZXDQYrm", "VPMOVZXWQYrm")>; -def BWWriteResGroup104 : SchedWriteRes<[BWPort0,BWPort1,BWPort5]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[BWWriteResGroup104], (instregex "(V?)DPPDrri")>; - def BWWriteResGroup105 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> { let Latency = 9; let NumMicroOps = 3; @@ -1501,20 +1497,6 @@ def BWWriteResGroup141 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> { } def: InstRW<[BWWriteResGroup141], (instregex "MUL_FI(16|32)m")>; -def BWWriteResGroup142 : SchedWriteRes<[BWPort0,BWPort1,BWPort5]> { - let Latency = 14; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; -} -def: InstRW<[BWWriteResGroup142], (instregex "(V?)DPPS(Y?)rri")>; - -def BWWriteResGroup143 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> { - let Latency = 14; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[BWWriteResGroup143], (instregex "(V?)DPPDrmi")>; - def BWWriteResGroup144 : SchedWriteRes<[BWPort1,BWPort6,BWPort23,BWPort0156]> { let Latency = 14; let NumMicroOps = 8; @@ -1620,13 +1602,6 @@ def BWWriteResGroup161 : SchedWriteRes<[BWPort0,BWPort23,BWFPDivider]> { def: InstRW<[BWWriteResGroup161], (instregex "(V?)DIVPDrm", "(V?)DIVSDrm")>; -def BWWriteResGroup163 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> { - let Latency = 19; - let NumMicroOps = 5; - let ResourceCycles = [2,1,1,1]; -} -def: InstRW<[BWWriteResGroup163], (instregex "(V?)DPPSrmi")>; - def BWWriteResGroup165 : SchedWriteRes<[BWPort0]> { let Latency = 20; let NumMicroOps = 1; @@ -1636,13 +1611,6 @@ def: InstRW<[BWWriteResGroup165], (instregex "DIV_FPrST0", "DIV_FST0r", "DIV_FrST0")>; -def BWWriteResGroup166 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> { - let Latency = 20; - let NumMicroOps = 5; - let ResourceCycles = [2,1,1,1]; -} -def: InstRW<[BWWriteResGroup166], (instregex "VDPPSYrmi")>; - def BWWriteResGroup167 : SchedWriteRes<[BWPort4,BWPort5,BWPort6,BWPort23,BWPort237,BWPort06,BWPort0156]> { let Latency = 20; let NumMicroOps = 8; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 11ce9e9687d..9303caa8a6c 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -169,6 +169,9 @@ defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; @@ -1829,20 +1832,6 @@ def HWWriteResGroup115 : SchedWriteRes<[HWPort0,HWPort1,HWPort23]> { } def: InstRW<[HWWriteResGroup115], (instregex "MUL_FI(16|32)m")>; -def HWWriteResGroup116 : SchedWriteRes<[HWPort0,HWPort1,HWPort5]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[HWWriteResGroup116], (instregex "(V?)DPPDrri")>; - -def HWWriteResGroup117 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> { - let Latency = 15; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[HWWriteResGroup117], (instregex "(V?)DPPDrmi")>; - def HWWriteResGroup120 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPort15,HWPort0156]> { let Latency = 16; let NumMicroOps = 10; @@ -1932,27 +1921,6 @@ def HWWriteResGroup138 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> { } def: InstRW<[HWWriteResGroup138], (instregex "(V?)SQRTPSm")>; -def HWWriteResGroup140 : SchedWriteRes<[HWPort0,HWPort1,HWPort5]> { - let Latency = 14; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; -} -def: InstRW<[HWWriteResGroup140], (instregex "(V?)DPPS(Y?)rri")>; - -def HWWriteResGroup141 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> { - let Latency = 20; - let NumMicroOps = 5; - let ResourceCycles = [2,1,1,1]; -} -def: InstRW<[HWWriteResGroup141], (instregex "(V?)DPPSrmi")>; - -def HWWriteResGroup141_1 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> { - let Latency = 21; - let NumMicroOps = 5; - let ResourceCycles = [2,1,1,1]; -} -def: InstRW<[HWWriteResGroup141_1], (instregex "VDPPSYrmi")>; - def HWWriteResGroup142 : SchedWriteRes<[HWPort1,HWPort06,HWPort15,HWPort0156]> { let Latency = 14; let NumMicroOps = 10; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 876afdb5a6b..4c5a9636695 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -153,6 +153,9 @@ defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; @@ -1168,13 +1171,6 @@ def SBWriteResGroup91 : SchedWriteRes<[SBPort23,SBPort05]> { def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPDYrm", "VMASKMOVPSYrm")>; -def SBWriteResGroup92 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup92], (instregex "(V?)DPPDrri")>; - def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { let Latency = 9; let NumMicroOps = 3; @@ -1358,13 +1354,6 @@ def SBWriteResGroup111 : SchedWriteRes<[SBPort0,SBPort23]> { } def: InstRW<[SBWriteResGroup111], (instregex "MUL_F(32|64)m")>; -def SBWriteResGroup112 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> { - let Latency = 12; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; -} -def: InstRW<[SBWriteResGroup112], (instregex "(V?)DPPS(Y?)rri")>; - def SBWriteResGroup114 : SchedWriteRes<[SBPort1,SBPort23]> { let Latency = 13; let NumMicroOps = 3; @@ -1397,27 +1386,6 @@ def SBWriteResGroup119 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { } def: InstRW<[SBWriteResGroup119], (instregex "MUL_FI(16|32)m")>; -def SBWriteResGroup120 : SchedWriteRes<[SBPort0,SBPort1,SBPort5,SBPort23]> { - let Latency = 15; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SBWriteResGroup120], (instregex "(V?)DPPDrmi")>; - -def SBWriteResGroup121 : SchedWriteRes<[SBPort0,SBPort1,SBPort5,SBPort23]> { - let Latency = 18; - let NumMicroOps = 5; - let ResourceCycles = [1,2,1,1]; -} -def: InstRW<[SBWriteResGroup121], (instregex "(V?)DPPSrmi")>; - -def SBWriteResGroup122 : SchedWriteRes<[SBPort0,SBPort1,SBPort5,SBPort23]> { - let Latency = 19; - let NumMicroOps = 5; - let ResourceCycles = [1,2,1,1]; -} -def: InstRW<[SBWriteResGroup122], (instregex "VDPPSYrmi")>; - def SBWriteResGroup123 : SchedWriteRes<[SBPort0,SBPort23,SBFPDivider]> { let Latency = 20; let NumMicroOps = 2; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 0ef7938e2ae..48f925274e9 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -169,6 +169,9 @@ defm : SKLWriteResPair; // Floating point defm : SKLWriteResPair; // Fused Multiply Add. defm : SKLWriteResPair; // Fused Multiply Add (Scalar). defm : SKLWriteResPair; // Fused Multiply Add (YMM/ZMM). +defm : SKLWriteResPair; // Floating point double dot product. +defm : SKLWriteResPair; // Floating point single dot product. +defm : SKLWriteResPair; // Floating point single dot product (YMM). defm : SKLWriteResPair; // Floating point fabs/fchs. defm : SKLWriteResPair; // Floating point and/or/xor logicals. defm : SKLWriteResPair; // Floating point and/or/xor logicals (YMM/ZMM). @@ -1521,13 +1524,6 @@ def: InstRW<[SKLWriteResGroup123], (instregex "MMX_CVTPS2PIirm", "VCVTPH2PSrm", "(V?)CVTPS2PDrm")>; -def SKLWriteResGroup124 : SchedWriteRes<[SKLPort5,SKLPort01]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SKLWriteResGroup124], (instregex "(V?)DPPDrri")>; - def SKLWriteResGroup126 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { let Latency = 9; let NumMicroOps = 3; @@ -1785,13 +1781,6 @@ def SKLWriteResGroup163 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { } def: InstRW<[SKLWriteResGroup163], (instregex "VCVTDQ2PDYrm")>; -def SKLWriteResGroup164 : SchedWriteRes<[SKLPort5,SKLPort01]> { - let Latency = 13; - let NumMicroOps = 4; - let ResourceCycles = [1,3]; -} -def: InstRW<[SKLWriteResGroup164], (instregex "(V?)DPPS(Y?)rri")>; - def SKLWriteResGroup166 : SchedWriteRes<[SKLPort0,SKLFPDivider]> { let Latency = 14; let NumMicroOps = 1; @@ -1848,13 +1837,6 @@ def SKLWriteResGroup172 : SchedWriteRes<[SKLPort23,SKLPort01]> { def: InstRW<[SKLWriteResGroup172], (instregex "VROUNDPDYm", "VROUNDPSYm")>; -def SKLWriteResGroup173 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> { - let Latency = 15; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[SKLWriteResGroup173], (instregex "(V?)DPPDrmi")>; - def SKLWriteResGroup174 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort15,SKLPort0156]> { let Latency = 15; let NumMicroOps = 10; @@ -1961,13 +1943,6 @@ def SKLWriteResGroup186_1 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { } def: InstRW<[SKLWriteResGroup186_1], (instregex "VSQRTPSYm")>; -def SKLWriteResGroup187 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> { - let Latency = 19; - let NumMicroOps = 5; - let ResourceCycles = [1,1,3]; -} -def: InstRW<[SKLWriteResGroup187], (instregex "(V?)DPPSrmi")>; - def SKLWriteResGroup189 : SchedWriteRes<[SKLPort0]> { let Latency = 20; let NumMicroOps = 1; @@ -1984,13 +1959,6 @@ def SKLWriteResGroup190 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { } def: InstRW<[SKLWriteResGroup190], (instregex "(V?)DIVPDrm")>; -def SKLWriteResGroup191 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> { - let Latency = 20; - let NumMicroOps = 5; - let ResourceCycles = [1,1,3]; -} -def: InstRW<[SKLWriteResGroup191], (instregex "VDPPSYrmi")>; - def SKLWriteResGroup192 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { let Latency = 20; let NumMicroOps = 8; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 6c978144ee2..e6224ddb503 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -169,6 +169,9 @@ defm : SKXWriteResPair; // Floating poin defm : SKXWriteResPair; // Fused Multiply Add. defm : SKXWriteResPair; // Fused Multiply Add (Scalar). defm : SKXWriteResPair; // Fused Multiply Add (YMM/ZMM). +defm : SKXWriteResPair; // Floating point double dot product. +defm : SKXWriteResPair; // Floating point single dot product. +defm : SKXWriteResPair; // Floating point single dot product (YMM). defm : SKXWriteResPair; // Floating point fabs/fchs. defm : SKXWriteResPair; // Floating point and/or/xor logicals. defm : SKXWriteResPair; // Floating point and/or/xor logicals (YMM/ZMM). @@ -2516,13 +2519,6 @@ def: InstRW<[SKXWriteResGroup138], (instregex "VRCP14PDZr(b?)", "VRSQRT14PDZr(b?)", "VRSQRT14PSZr(b?)")>; -def SKXWriteResGroup139 : SchedWriteRes<[SKXPort5,SKXPort015]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SKXWriteResGroup139], (instregex "(V?)DPPDrri")>; - def SKXWriteResGroup141 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { let Latency = 9; let NumMicroOps = 3; @@ -2988,15 +2984,6 @@ def SKXWriteResGroup181 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { } def: InstRW<[SKXWriteResGroup181], (instregex "VCVTDQ2PDYrm")>; -def SKXWriteResGroup182 : SchedWriteRes<[SKXPort5,SKXPort015]> { - let Latency = 13; - let NumMicroOps = 4; - let ResourceCycles = [1,3]; -} -def: InstRW<[SKXWriteResGroup182], (instregex "DPPSrri", - "VDPPSYrri", - "VDPPSrri")>; - def SKXWriteResGroup183 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { let Latency = 13; let NumMicroOps = 4; @@ -3092,13 +3079,6 @@ def: InstRW<[SKXWriteResGroup192], (instregex "VRNDSCALEPDZ256rm(b?)i", "VROUNDPDYm", "VROUNDPSYm")>; -def SKXWriteResGroup193 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { - let Latency = 15; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[SKXWriteResGroup193], (instregex "(V?)DPPDrmi")>; - def SKXWriteResGroup194 : SchedWriteRes<[SKXPort1,SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { let Latency = 15; let NumMicroOps = 8; @@ -3244,13 +3224,6 @@ def SKXWriteResGroup211 : SchedWriteRes<[SKXPort23,SKXPort015]> { def: InstRW<[SKXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)", "VPMULLQZrm(b?)")>; -def SKXWriteResGroup212 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { - let Latency = 19; - let NumMicroOps = 5; - let ResourceCycles = [1,1,3]; -} -def: InstRW<[SKXWriteResGroup212], (instregex "(V?)DPPSrmi")>; - def SKXWriteResGroup214 : SchedWriteRes<[]> { let Latency = 20; let NumMicroOps = 0; @@ -3275,13 +3248,6 @@ def SKXWriteResGroup216 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { } def: InstRW<[SKXWriteResGroup216], (instregex "(V?)DIVPD(Z128)?rm")>; -def SKXWriteResGroup217 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { - let Latency = 20; - let NumMicroOps = 5; - let ResourceCycles = [1,1,3]; -} -def: InstRW<[SKXWriteResGroup217], (instregex "VDPPSYrmi")>; - def SKXWriteResGroup218 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { let Latency = 20; let NumMicroOps = 5; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 7f6a38f6a4f..bc76bb48848 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -119,6 +119,9 @@ defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root e defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. defm WriteFMAS : X86SchedWritePair; // Fused Multiply Add (Scalar). defm WriteFMAY : X86SchedWritePair; // Fused Multiply Add (YMM/ZMM). +defm WriteDPPD : X86SchedWritePair; // Floating point double dot product. +defm WriteDPPS : X86SchedWritePair; // Floating point single dot product. +defm WriteDPPSY : X86SchedWritePair; // Floating point single dot product (YMM). defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs. defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals. defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM/ZMM). @@ -243,6 +246,10 @@ def SchedWriteFMul : X86SchedWriteWidths; def SchedWriteFMA : X86SchedWriteWidths; +def SchedWriteDPPD + : X86SchedWriteWidths; +def SchedWriteDPPS + : X86SchedWriteWidths; def SchedWriteFDiv : X86SchedWriteWidths; def SchedWriteFSqrt diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index da19ad7d508..c13f4473198 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -227,6 +227,9 @@ defm : AtomWriteResPair; // NOTE defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index d930ed00d30..b1b7f154bc3 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -325,6 +325,9 @@ defm : JWriteResYMMPair; defm : JWriteResFpuPair; // NOTE: Doesn't exist on Jaguar. defm : JWriteResFpuPair; // NOTE: Doesn't exist on Jaguar. defm : JWriteResFpuPair; // NOTE: Doesn't exist on Jaguar. +defm : JWriteResFpuPair; +defm : JWriteResFpuPair; +defm : JWriteResYMMPair; defm : JWriteResFpuPair; defm : JWriteResYMMPair; defm : JWriteResFpuPair; @@ -482,38 +485,6 @@ defm : JWriteResFpuPair; defm : JWriteResFpuPair; -//////////////////////////////////////////////////////////////////////////////// -// SSE4.1 instructions. -//////////////////////////////////////////////////////////////////////////////// - -def JWriteDPPS: SchedWriteRes<[JFPU1, JFPM, JFPA]> { - let Latency = 11; - let ResourceCycles = [1, 3, 3]; - let NumMicroOps = 5; -} -def : InstRW<[JWriteDPPS], (instrs DPPSrri, VDPPSrri)>; - -def JWriteDPPSLd: SchedWriteRes<[JLAGU, JFPU1, JFPM, JFPA]> { - let Latency = 16; - let ResourceCycles = [1, 1, 3, 3]; - let NumMicroOps = 5; -} -def : InstRW<[JWriteDPPSLd], (instrs DPPSrmi, VDPPSrmi)>; - -def JWriteDPPD: SchedWriteRes<[JFPU1, JFPM, JFPA]> { - let Latency = 9; - let ResourceCycles = [1, 3, 3]; - let NumMicroOps = 3; -} -def : InstRW<[JWriteDPPD], (instrs DPPDrri, VDPPDrri)>; - -def JWriteDPPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM, JFPA]> { - let Latency = 14; - let ResourceCycles = [1, 1, 3, 3]; - let NumMicroOps = 3; -} -def : InstRW<[JWriteDPPDLd], (instrs DPPDrmi, VDPPDrmi)>; - //////////////////////////////////////////////////////////////////////////////// // SSE4A instructions. //////////////////////////////////////////////////////////////////////////////// @@ -560,20 +531,6 @@ def : InstRW<[JWriteCVTPH2PSYLd], (instrs VCVTPH2PSYrm)>; // AVX instructions. //////////////////////////////////////////////////////////////////////////////// -def JWriteVDPPSY: SchedWriteRes<[JFPU1, JFPM, JFPA]> { - let Latency = 12; - let ResourceCycles = [2, 6, 6]; - let NumMicroOps = 10; -} -def : InstRW<[JWriteVDPPSY], (instrs VDPPSYrri)>; - -def JWriteVDPPSYLd: SchedWriteRes<[JLAGU, JFPU1, JFPM, JFPA]> { - let Latency = 17; - let ResourceCycles = [2, 2, 6, 6]; - let NumMicroOps = 10; -} -def : InstRW<[JWriteVDPPSYLd, ReadAfterLd], (instrs VDPPSYrmi)>; - def JWriteVMULYPD: SchedWriteRes<[JFPU1, JFPM]> { let Latency = 4; let ResourceCycles = [2, 4]; diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 51ced28e901..46f5f823fa5 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -144,6 +144,9 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index a6ba4cbb015..c15b540764a 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -1540,17 +1540,19 @@ def : InstRW<[ZnWriteROUNDm], (instregex "(V?)ROUND(S|P)(S|D)(Y?)m")>; // DPPS. // x,x,i / v,v,v,i. -def : InstRW<[WriteMicrocoded], (instregex "(V?)DPPS(Y?)rri")>; +def : SchedAlias; +def : SchedAlias; // x,m,i / v,v,m,i. -def : InstRW<[WriteMicrocoded], (instregex "(V?)DPPS(Y?)rmi")>; +def : SchedAlias; +def : SchedAlias; // DPPD. // x,x,i. -def : InstRW<[WriteMicrocoded], (instregex "(V?)DPPDrri")>; +def : SchedAlias; // x,m,i. -def : InstRW<[WriteMicrocoded], (instregex "(V?)DPPDrmi")>; +def : SchedAlias; // VSQRTPS. // y,y. -- cgit v1.2.3