diff options
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 10 | ||||
-rwxr-xr-x | llvm/lib/Target/X86/X86SchedBroadwell.td | 38 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedHaswell.td | 38 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedSandyBridge.td | 38 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedSkylakeClient.td | 38 | ||||
-rwxr-xr-x | llvm/lib/Target/X86/X86SchedSkylakeServer.td | 40 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86Schedule.td | 7 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleAtom.td | 3 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleBtVer2.td | 49 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleSLM.td | 3 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleZnver1.td | 10 |
11 files changed, 42 insertions, 232 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index d668963bb22..f650dc80399 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -6026,15 +6026,15 @@ let Predicates = [HasAVX] in { let ExeDomain = SSEPackedSingle in defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, VR128, loadv4f32, f128mem, 0, - SchedWriteFAdd.XMM>, VEX_4V, VEX_WIG; + SchedWriteDPPS.XMM>, VEX_4V, VEX_WIG; let ExeDomain = SSEPackedDouble in defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, VR128, loadv2f64, f128mem, 0, - SchedWriteFAdd.XMM>, VEX_4V, VEX_WIG; + SchedWriteDPPD.XMM>, VEX_4V, VEX_WIG; let ExeDomain = SSEPackedSingle in defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256, VR256, loadv8f32, i256mem, 0, - SchedWriteFAdd.YMM>, VEX_4V, VEX_L, VEX_WIG; + SchedWriteDPPS.YMM>, VEX_4V, VEX_L, VEX_WIG; } let Predicates = [HasAVX2] in { @@ -6055,11 +6055,11 @@ let Constraints = "$src1 = $dst" in { let ExeDomain = SSEPackedSingle in defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, VR128, memopv4f32, f128mem, 1, - SchedWriteFAdd.XMM>; + SchedWriteDPPS.XMM>; let ExeDomain = SSEPackedDouble in defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, VR128, memopv2f64, f128mem, 1, - SchedWriteFAdd.XMM>; + SchedWriteDPPD.XMM>; } /// SS41I_blend_rmi - SSE 4.1 blend with 8-bit immediate diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index af7f2acf0ce..b6ad9dfe9c6 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -173,6 +173,9 @@ defm : BWWriteResPair<WriteFRsqrtY,[BWPort0], 5, [1], 1, 7>; // Floating point defm : BWWriteResPair<WriteFMA, [BWPort01], 5, [1], 1, 5>; // Fused Multiply Add. defm : BWWriteResPair<WriteFMAS, [BWPort01], 5, [1], 1, 5>; // Fused Multiply Add (Scalar). defm : BWWriteResPair<WriteFMAY, [BWPort01], 5, [1], 1, 6>; // Fused Multiply Add (YMM/ZMM). +defm : BWWriteResPair<WriteDPPD, [BWPort0,BWPort1,BWPort5], 9, [1,1,1], 3, 5>; // Floating point double dot product. +defm : BWWriteResPair<WriteDPPS, [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4, 5>; // Floating point single dot product. +defm : BWWriteResPair<WriteDPPSY, [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4, 6>; // Floating point single dot product (YMM). defm : BWWriteResPair<WriteFSign, [BWPort5], 1>; // Floating point fabs/fchs. defm : BWWriteResPair<WriteFLogic, [BWPort5], 1, [1], 1, 5>; // Floating point and/or/xor logicals. defm : BWWriteResPair<WriteFLogicY, [BWPort5], 1, [1], 1, 6>; // Floating point and/or/xor logicals (YMM/ZMM). @@ -1267,13 +1270,6 @@ def: InstRW<[BWWriteResGroup102], (instregex "VPERM2F128rm", "VPMOVZXDQYrm", "VPMOVZXWQYrm")>; -def BWWriteResGroup104 : SchedWriteRes<[BWPort0,BWPort1,BWPort5]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[BWWriteResGroup104], (instregex "(V?)DPPDrri")>; - def BWWriteResGroup105 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> { let Latency = 9; let NumMicroOps = 3; @@ -1501,20 +1497,6 @@ def BWWriteResGroup141 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> { } def: InstRW<[BWWriteResGroup141], (instregex "MUL_FI(16|32)m")>; -def BWWriteResGroup142 : SchedWriteRes<[BWPort0,BWPort1,BWPort5]> { - let Latency = 14; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; -} -def: InstRW<[BWWriteResGroup142], (instregex "(V?)DPPS(Y?)rri")>; - -def BWWriteResGroup143 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> { - let Latency = 14; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[BWWriteResGroup143], (instregex "(V?)DPPDrmi")>; - def BWWriteResGroup144 : SchedWriteRes<[BWPort1,BWPort6,BWPort23,BWPort0156]> { let Latency = 14; let NumMicroOps = 8; @@ -1620,13 +1602,6 @@ def BWWriteResGroup161 : SchedWriteRes<[BWPort0,BWPort23,BWFPDivider]> { def: InstRW<[BWWriteResGroup161], (instregex "(V?)DIVPDrm", "(V?)DIVSDrm")>; -def BWWriteResGroup163 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> { - let Latency = 19; - let NumMicroOps = 5; - let ResourceCycles = [2,1,1,1]; -} -def: InstRW<[BWWriteResGroup163], (instregex "(V?)DPPSrmi")>; - def BWWriteResGroup165 : SchedWriteRes<[BWPort0]> { let Latency = 20; let NumMicroOps = 1; @@ -1636,13 +1611,6 @@ def: InstRW<[BWWriteResGroup165], (instregex "DIV_FPrST0", "DIV_FST0r", "DIV_FrST0")>; -def BWWriteResGroup166 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> { - let Latency = 20; - let NumMicroOps = 5; - let ResourceCycles = [2,1,1,1]; -} -def: InstRW<[BWWriteResGroup166], (instregex "VDPPSYrmi")>; - def BWWriteResGroup167 : SchedWriteRes<[BWPort4,BWPort5,BWPort6,BWPort23,BWPort237,BWPort06,BWPort0156]> { let Latency = 20; let NumMicroOps = 8; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 11ce9e9687d..9303caa8a6c 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -169,6 +169,9 @@ defm : HWWriteResPair<WriteCvtF2F, [HWPort1], 3>; defm : HWWriteResPair<WriteFMA, [HWPort01], 5, [1], 1, 6>; defm : HWWriteResPair<WriteFMAS, [HWPort01], 5, [1], 1, 5>; defm : HWWriteResPair<WriteFMAY, [HWPort01], 5, [1], 1, 7>; +defm : HWWriteResPair<WriteDPPD, [HWPort0,HWPort1,HWPort5], 9, [1,1,1], 3, 6>; +defm : HWWriteResPair<WriteDPPS, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 6>; +defm : HWWriteResPair<WriteDPPSY, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 7>; defm : HWWriteResPair<WriteFSign, [HWPort0], 1>; defm : HWWriteResPair<WriteFLogic, [HWPort5], 1, [1], 1, 6>; defm : HWWriteResPair<WriteFLogicY, [HWPort5], 1, [1], 1, 7>; @@ -1829,20 +1832,6 @@ def HWWriteResGroup115 : SchedWriteRes<[HWPort0,HWPort1,HWPort23]> { } def: InstRW<[HWWriteResGroup115], (instregex "MUL_FI(16|32)m")>; -def HWWriteResGroup116 : SchedWriteRes<[HWPort0,HWPort1,HWPort5]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[HWWriteResGroup116], (instregex "(V?)DPPDrri")>; - -def HWWriteResGroup117 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> { - let Latency = 15; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[HWWriteResGroup117], (instregex "(V?)DPPDrmi")>; - def HWWriteResGroup120 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPort15,HWPort0156]> { let Latency = 16; let NumMicroOps = 10; @@ -1932,27 +1921,6 @@ def HWWriteResGroup138 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> { } def: InstRW<[HWWriteResGroup138], (instregex "(V?)SQRTPSm")>; -def HWWriteResGroup140 : SchedWriteRes<[HWPort0,HWPort1,HWPort5]> { - let Latency = 14; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; -} -def: InstRW<[HWWriteResGroup140], (instregex "(V?)DPPS(Y?)rri")>; - -def HWWriteResGroup141 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> { - let Latency = 20; - let NumMicroOps = 5; - let ResourceCycles = [2,1,1,1]; -} -def: InstRW<[HWWriteResGroup141], (instregex "(V?)DPPSrmi")>; - -def HWWriteResGroup141_1 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> { - let Latency = 21; - let NumMicroOps = 5; - let ResourceCycles = [2,1,1,1]; -} -def: InstRW<[HWWriteResGroup141_1], (instregex "VDPPSYrmi")>; - def HWWriteResGroup142 : SchedWriteRes<[HWPort1,HWPort06,HWPort15,HWPort0156]> { let Latency = 14; let NumMicroOps = 10; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 876afdb5a6b..4c5a9636695 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -153,6 +153,9 @@ defm : SBWriteResPair<WriteFRsqrt, [SBPort0], 5, [1], 1, 6>; defm : SBWriteResPair<WriteFRsqrtY,[SBPort0], 5, [1], 1, 7>; defm : SBWriteResPair<WriteFSqrt, [SBPort0], 14, [1], 1, 5>; defm : SBWriteResPair<WriteFSqrtY, [SBPort0], 14, [1], 1, 7>; +defm : SBWriteResPair<WriteDPPD, [SBPort0,SBPort1,SBPort5], 9, [1,1,1], 3, 6>; +defm : SBWriteResPair<WriteDPPS, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 6>; +defm : SBWriteResPair<WriteDPPSY, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 7>; defm : SBWriteResPair<WriteCvtF2I, [SBPort1], 3>; defm : SBWriteResPair<WriteCvtI2F, [SBPort1], 4>; defm : SBWriteResPair<WriteCvtF2F, [SBPort1], 3>; @@ -1168,13 +1171,6 @@ def SBWriteResGroup91 : SchedWriteRes<[SBPort23,SBPort05]> { def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPDYrm", "VMASKMOVPSYrm")>; -def SBWriteResGroup92 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup92], (instregex "(V?)DPPDrri")>; - def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { let Latency = 9; let NumMicroOps = 3; @@ -1358,13 +1354,6 @@ def SBWriteResGroup111 : SchedWriteRes<[SBPort0,SBPort23]> { } def: InstRW<[SBWriteResGroup111], (instregex "MUL_F(32|64)m")>; -def SBWriteResGroup112 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> { - let Latency = 12; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; -} -def: InstRW<[SBWriteResGroup112], (instregex "(V?)DPPS(Y?)rri")>; - def SBWriteResGroup114 : SchedWriteRes<[SBPort1,SBPort23]> { let Latency = 13; let NumMicroOps = 3; @@ -1397,27 +1386,6 @@ def SBWriteResGroup119 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { } def: InstRW<[SBWriteResGroup119], (instregex "MUL_FI(16|32)m")>; -def SBWriteResGroup120 : SchedWriteRes<[SBPort0,SBPort1,SBPort5,SBPort23]> { - let Latency = 15; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SBWriteResGroup120], (instregex "(V?)DPPDrmi")>; - -def SBWriteResGroup121 : SchedWriteRes<[SBPort0,SBPort1,SBPort5,SBPort23]> { - let Latency = 18; - let NumMicroOps = 5; - let ResourceCycles = [1,2,1,1]; -} -def: InstRW<[SBWriteResGroup121], (instregex "(V?)DPPSrmi")>; - -def SBWriteResGroup122 : SchedWriteRes<[SBPort0,SBPort1,SBPort5,SBPort23]> { - let Latency = 19; - let NumMicroOps = 5; - let ResourceCycles = [1,2,1,1]; -} -def: InstRW<[SBWriteResGroup122], (instregex "VDPPSYrmi")>; - def SBWriteResGroup123 : SchedWriteRes<[SBPort0,SBPort23,SBFPDivider]> { let Latency = 20; let NumMicroOps = 2; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 0ef7938e2ae..48f925274e9 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -169,6 +169,9 @@ defm : SKLWriteResPair<WriteFRsqrtY,[SKLPort0], 4, [1], 1, 5>; // Floating point defm : SKLWriteResPair<WriteFMA, [SKLPort01], 4, [1], 1, 6>; // Fused Multiply Add. defm : SKLWriteResPair<WriteFMAS, [SKLPort01], 4, [1], 1, 5>; // Fused Multiply Add (Scalar). defm : SKLWriteResPair<WriteFMAY, [SKLPort01], 4, [1], 1, 7>; // Fused Multiply Add (YMM/ZMM). +defm : SKLWriteResPair<WriteDPPD, [SKLPort5,SKLPort01], 9, [1,2], 3, 6>; // Floating point double dot product. +defm : SKLWriteResPair<WriteDPPS, [SKLPort5,SKLPort01], 13, [1,3], 4, 6>; // Floating point single dot product. +defm : SKLWriteResPair<WriteDPPSY, [SKLPort5,SKLPort01], 13, [1,3], 4, 7>; // Floating point single dot product (YMM). defm : SKLWriteResPair<WriteFSign, [SKLPort0], 1>; // Floating point fabs/fchs. defm : SKLWriteResPair<WriteFLogic, [SKLPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals. defm : SKLWriteResPair<WriteFLogicY, [SKLPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM). @@ -1521,13 +1524,6 @@ def: InstRW<[SKLWriteResGroup123], (instregex "MMX_CVTPS2PIirm", "VCVTPH2PSrm", "(V?)CVTPS2PDrm")>; -def SKLWriteResGroup124 : SchedWriteRes<[SKLPort5,SKLPort01]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SKLWriteResGroup124], (instregex "(V?)DPPDrri")>; - def SKLWriteResGroup126 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { let Latency = 9; let NumMicroOps = 3; @@ -1785,13 +1781,6 @@ def SKLWriteResGroup163 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { } def: InstRW<[SKLWriteResGroup163], (instregex "VCVTDQ2PDYrm")>; -def SKLWriteResGroup164 : SchedWriteRes<[SKLPort5,SKLPort01]> { - let Latency = 13; - let NumMicroOps = 4; - let ResourceCycles = [1,3]; -} -def: InstRW<[SKLWriteResGroup164], (instregex "(V?)DPPS(Y?)rri")>; - def SKLWriteResGroup166 : SchedWriteRes<[SKLPort0,SKLFPDivider]> { let Latency = 14; let NumMicroOps = 1; @@ -1848,13 +1837,6 @@ def SKLWriteResGroup172 : SchedWriteRes<[SKLPort23,SKLPort01]> { def: InstRW<[SKLWriteResGroup172], (instregex "VROUNDPDYm", "VROUNDPSYm")>; -def SKLWriteResGroup173 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> { - let Latency = 15; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[SKLWriteResGroup173], (instregex "(V?)DPPDrmi")>; - def SKLWriteResGroup174 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort15,SKLPort0156]> { let Latency = 15; let NumMicroOps = 10; @@ -1961,13 +1943,6 @@ def SKLWriteResGroup186_1 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { } def: InstRW<[SKLWriteResGroup186_1], (instregex "VSQRTPSYm")>; -def SKLWriteResGroup187 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> { - let Latency = 19; - let NumMicroOps = 5; - let ResourceCycles = [1,1,3]; -} -def: InstRW<[SKLWriteResGroup187], (instregex "(V?)DPPSrmi")>; - def SKLWriteResGroup189 : SchedWriteRes<[SKLPort0]> { let Latency = 20; let NumMicroOps = 1; @@ -1984,13 +1959,6 @@ def SKLWriteResGroup190 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { } def: InstRW<[SKLWriteResGroup190], (instregex "(V?)DIVPDrm")>; -def SKLWriteResGroup191 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> { - let Latency = 20; - let NumMicroOps = 5; - let ResourceCycles = [1,1,3]; -} -def: InstRW<[SKLWriteResGroup191], (instregex "VDPPSYrmi")>; - def SKLWriteResGroup192 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { let Latency = 20; let NumMicroOps = 8; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 6c978144ee2..e6224ddb503 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -169,6 +169,9 @@ defm : SKXWriteResPair<WriteFRsqrtY,[SKXPort0], 4, [1], 1, 7>; // Floating poin defm : SKXWriteResPair<WriteFMA, [SKXPort015], 4, [1], 1, 6>; // Fused Multiply Add. defm : SKXWriteResPair<WriteFMAS, [SKXPort015], 4, [1], 1, 5>; // Fused Multiply Add (Scalar). defm : SKXWriteResPair<WriteFMAY, [SKXPort015], 4, [1], 1, 7>; // Fused Multiply Add (YMM/ZMM). +defm : SKXWriteResPair<WriteDPPD, [SKXPort5,SKXPort015], 9, [1,2], 3, 6>; // Floating point double dot product. +defm : SKXWriteResPair<WriteDPPS, [SKXPort5,SKXPort015], 13, [1,3], 4, 6>; // Floating point single dot product. +defm : SKXWriteResPair<WriteDPPSY,[SKXPort5,SKXPort015], 13, [1,3], 4, 7>; // Floating point single dot product (YMM). defm : SKXWriteResPair<WriteFSign, [SKXPort0], 1>; // Floating point fabs/fchs. defm : SKXWriteResPair<WriteFLogic, [SKXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals. defm : SKXWriteResPair<WriteFLogicY, [SKXPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM). @@ -2516,13 +2519,6 @@ def: InstRW<[SKXWriteResGroup138], (instregex "VRCP14PDZr(b?)", "VRSQRT14PDZr(b?)", "VRSQRT14PSZr(b?)")>; -def SKXWriteResGroup139 : SchedWriteRes<[SKXPort5,SKXPort015]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SKXWriteResGroup139], (instregex "(V?)DPPDrri")>; - def SKXWriteResGroup141 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { let Latency = 9; let NumMicroOps = 3; @@ -2988,15 +2984,6 @@ def SKXWriteResGroup181 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { } def: InstRW<[SKXWriteResGroup181], (instregex "VCVTDQ2PDYrm")>; -def SKXWriteResGroup182 : SchedWriteRes<[SKXPort5,SKXPort015]> { - let Latency = 13; - let NumMicroOps = 4; - let ResourceCycles = [1,3]; -} -def: InstRW<[SKXWriteResGroup182], (instregex "DPPSrri", - "VDPPSYrri", - "VDPPSrri")>; - def SKXWriteResGroup183 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { let Latency = 13; let NumMicroOps = 4; @@ -3092,13 +3079,6 @@ def: InstRW<[SKXWriteResGroup192], (instregex "VRNDSCALEPDZ256rm(b?)i", "VROUNDPDYm", "VROUNDPSYm")>; -def SKXWriteResGroup193 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { - let Latency = 15; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[SKXWriteResGroup193], (instregex "(V?)DPPDrmi")>; - def SKXWriteResGroup194 : SchedWriteRes<[SKXPort1,SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { let Latency = 15; let NumMicroOps = 8; @@ -3244,13 +3224,6 @@ def SKXWriteResGroup211 : SchedWriteRes<[SKXPort23,SKXPort015]> { def: InstRW<[SKXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)", "VPMULLQZrm(b?)")>; -def SKXWriteResGroup212 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { - let Latency = 19; - let NumMicroOps = 5; - let ResourceCycles = [1,1,3]; -} -def: InstRW<[SKXWriteResGroup212], (instregex "(V?)DPPSrmi")>; - def SKXWriteResGroup214 : SchedWriteRes<[]> { let Latency = 20; let NumMicroOps = 0; @@ -3275,13 +3248,6 @@ def SKXWriteResGroup216 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { } def: InstRW<[SKXWriteResGroup216], (instregex "(V?)DIVPD(Z128)?rm")>; -def SKXWriteResGroup217 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { - let Latency = 20; - let NumMicroOps = 5; - let ResourceCycles = [1,1,3]; -} -def: InstRW<[SKXWriteResGroup217], (instregex "VDPPSYrmi")>; - def SKXWriteResGroup218 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { let Latency = 20; let NumMicroOps = 5; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 7f6a38f6a4f..bc76bb48848 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -119,6 +119,9 @@ defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root e defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. defm WriteFMAS : X86SchedWritePair; // Fused Multiply Add (Scalar). defm WriteFMAY : X86SchedWritePair; // Fused Multiply Add (YMM/ZMM). +defm WriteDPPD : X86SchedWritePair; // Floating point double dot product. +defm WriteDPPS : X86SchedWritePair; // Floating point single dot product. +defm WriteDPPSY : X86SchedWritePair; // Floating point single dot product (YMM). defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs. defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals. defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM/ZMM). @@ -243,6 +246,10 @@ def SchedWriteFMul : X86SchedWriteWidths<WriteFMul, WriteFMul, WriteFMulY, WriteFMulY>; def SchedWriteFMA : X86SchedWriteWidths<WriteFMAS, WriteFMA, WriteFMAY, WriteFMAY>; +def SchedWriteDPPD + : X86SchedWriteWidths<WriteDPPD, WriteDPPD, WriteDPPD, WriteDPPD>; +def SchedWriteDPPS + : X86SchedWriteWidths<WriteDPPS, WriteDPPS, WriteDPPSY, WriteDPPSY>; def SchedWriteFDiv : X86SchedWriteWidths<WriteFDiv, WriteFDiv, WriteFDivY, WriteFDivY>; def SchedWriteFSqrt diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index da19ad7d508..c13f4473198 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -227,6 +227,9 @@ defm : AtomWriteResPair<WriteFVarShuffleY, [AtomPort0], [AtomPort0]>; // NOTE defm : AtomWriteResPair<WriteFMA, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair<WriteFMAS, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair<WriteFMAY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair<WriteDPPD, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair<WriteDPPS, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair<WriteDPPSY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair<WriteFBlend, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair<WriteFBlendY, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom. defm : AtomWriteResPair<WriteFVarBlend, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom. diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index d930ed00d30..b1b7f154bc3 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -325,6 +325,9 @@ defm : JWriteResYMMPair<WriteFMulY, [JFPU1, JFPM], 2, [2,2], 2>; defm : JWriteResFpuPair<WriteFMA, [JFPU1, JFPM], 2>; // NOTE: Doesn't exist on Jaguar. defm : JWriteResFpuPair<WriteFMAS, [JFPU1, JFPM], 2>; // NOTE: Doesn't exist on Jaguar. defm : JWriteResFpuPair<WriteFMAY, [JFPU1, JFPM], 2>; // NOTE: Doesn't exist on Jaguar. +defm : JWriteResFpuPair<WriteDPPD, [JFPU1, JFPM, JFPA], 9, [1, 3, 3], 3>; +defm : JWriteResFpuPair<WriteDPPS, [JFPU1, JFPM, JFPA], 11, [1, 3, 3], 5>; +defm : JWriteResYMMPair<WriteDPPSY, [JFPU1, JFPM, JFPA], 12, [2, 6, 6], 10>; defm : JWriteResFpuPair<WriteFRcp, [JFPU1, JFPM], 2>; defm : JWriteResYMMPair<WriteFRcpY, [JFPU1, JFPM], 2, [2,2], 2>; defm : JWriteResFpuPair<WriteFRsqrt, [JFPU1, JFPM], 2>; @@ -483,38 +486,6 @@ defm : JWriteResFpuPair<WritePHAddY, [JFPU01, JVALU], 1>; defm : JWriteResFpuPair<WriteCLMul, [JFPU0, JVIMUL], 2>; //////////////////////////////////////////////////////////////////////////////// -// SSE4.1 instructions. -//////////////////////////////////////////////////////////////////////////////// - -def JWriteDPPS: SchedWriteRes<[JFPU1, JFPM, JFPA]> { - let Latency = 11; - let ResourceCycles = [1, 3, 3]; - let NumMicroOps = 5; -} -def : InstRW<[JWriteDPPS], (instrs DPPSrri, VDPPSrri)>; - -def JWriteDPPSLd: SchedWriteRes<[JLAGU, JFPU1, JFPM, JFPA]> { - let Latency = 16; - let ResourceCycles = [1, 1, 3, 3]; - let NumMicroOps = 5; -} -def : InstRW<[JWriteDPPSLd], (instrs DPPSrmi, VDPPSrmi)>; - -def JWriteDPPD: SchedWriteRes<[JFPU1, JFPM, JFPA]> { - let Latency = 9; - let ResourceCycles = [1, 3, 3]; - let NumMicroOps = 3; -} -def : InstRW<[JWriteDPPD], (instrs DPPDrri, VDPPDrri)>; - -def JWriteDPPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM, JFPA]> { - let Latency = 14; - let ResourceCycles = [1, 1, 3, 3]; - let NumMicroOps = 3; -} -def : InstRW<[JWriteDPPDLd], (instrs DPPDrmi, VDPPDrmi)>; - -//////////////////////////////////////////////////////////////////////////////// // SSE4A instructions. //////////////////////////////////////////////////////////////////////////////// @@ -560,20 +531,6 @@ def : InstRW<[JWriteCVTPH2PSYLd], (instrs VCVTPH2PSYrm)>; // AVX instructions. //////////////////////////////////////////////////////////////////////////////// -def JWriteVDPPSY: SchedWriteRes<[JFPU1, JFPM, JFPA]> { - let Latency = 12; - let ResourceCycles = [2, 6, 6]; - let NumMicroOps = 10; -} -def : InstRW<[JWriteVDPPSY], (instrs VDPPSYrri)>; - -def JWriteVDPPSYLd: SchedWriteRes<[JLAGU, JFPU1, JFPM, JFPA]> { - let Latency = 17; - let ResourceCycles = [2, 2, 6, 6]; - let NumMicroOps = 10; -} -def : InstRW<[JWriteVDPPSYLd, ReadAfterLd], (instrs VDPPSYrmi)>; - def JWriteVMULYPD: SchedWriteRes<[JFPU1, JFPM]> { let Latency = 4; let ResourceCycles = [2, 4]; diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 51ced28e901..46f5f823fa5 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -144,6 +144,9 @@ defm : SLMWriteResPair<WriteFRsqrt, [SLM_FPC_RSV0], 5>; defm : SLMWriteResPair<WriteFRsqrtY,[SLM_FPC_RSV0], 5>; defm : SLMWriteResPair<WriteFSqrt, [SLM_FPC_RSV0], 15>; defm : SLMWriteResPair<WriteFSqrtY, [SLM_FPC_RSV0], 15>; +defm : SLMWriteResPair<WriteDPPD, [SLM_FPC_RSV1], 3>; +defm : SLMWriteResPair<WriteDPPS, [SLM_FPC_RSV1], 3>; +defm : SLMWriteResPair<WriteDPPSY, [SLM_FPC_RSV1], 3>; defm : SLMWriteResPair<WriteCvtF2I, [SLM_FPC_RSV01], 4>; defm : SLMWriteResPair<WriteCvtI2F, [SLM_FPC_RSV01], 4>; defm : SLMWriteResPair<WriteCvtF2F, [SLM_FPC_RSV01], 4>; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index a6ba4cbb015..c15b540764a 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -1540,17 +1540,19 @@ def : InstRW<[ZnWriteROUNDm], (instregex "(V?)ROUND(S|P)(S|D)(Y?)m")>; // DPPS. // x,x,i / v,v,v,i. -def : InstRW<[WriteMicrocoded], (instregex "(V?)DPPS(Y?)rri")>; +def : SchedAlias<WriteDPPS, ZnWriteMicrocoded>; +def : SchedAlias<WriteDPPSY, ZnWriteMicrocoded>; // x,m,i / v,v,m,i. -def : InstRW<[WriteMicrocoded], (instregex "(V?)DPPS(Y?)rmi")>; +def : SchedAlias<WriteDPPSLd, ZnWriteMicrocoded>; +def : SchedAlias<WriteDPPSYLd,ZnWriteMicrocoded>; // DPPD. // x,x,i. -def : InstRW<[WriteMicrocoded], (instregex "(V?)DPPDrri")>; +def : SchedAlias<WriteDPPD, ZnWriteMicrocoded>; // x,m,i. -def : InstRW<[WriteMicrocoded], (instregex "(V?)DPPDrmi")>; +def : SchedAlias<WriteDPPDLd, ZnWriteMicrocoded>; // VSQRTPS. // y,y. |