diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-04-02 05:33:28 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-04-02 05:33:28 +0000 |
| commit | 8104f266a4e6f39e9c7913b7d44e84947df756be (patch) | |
| tree | 39da1df5d75b04891314600570a1b2f01053acd0 /llvm/lib | |
| parent | dc74094398e63c8c9d299ba03bde299bff81bf0f (diff) | |
| download | bcm5719-llvm-8104f266a4e6f39e9c7913b7d44e84947df756be.tar.gz bcm5719-llvm-8104f266a4e6f39e9c7913b7d44e84947df756be.zip | |
[X86] Correct the throughput for divide instructions in Sandy Bridge/Haswell/Broadwell/Skylake scheduler models.
Fixes most of PR36898. Still need to fix the 512-bit instructions, but Agner's tables don't have those.
llvm-svn: 328960
Diffstat (limited to 'llvm/lib')
| -rwxr-xr-x | llvm/lib/Target/X86/X86SchedBroadwell.td | 114 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedHaswell.td | 74 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedSandyBridge.td | 42 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedSkylakeClient.td | 120 | ||||
| -rwxr-xr-x | llvm/lib/Target/X86/X86SchedSkylakeServer.td | 188 |
5 files changed, 318 insertions, 220 deletions
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 3de673259ee..8832d5a6716 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -67,7 +67,9 @@ def BWPortAny : ProcResGroup<[BWPort0, BWPort1, BWPort2, BWPort3, BWPort4, } // Integer division issued on port 0. -def BWDivider : ProcResource<1>; // Integer division issued on port 0. +def BWDivider : ProcResource<1>; +// FP division and sqrt on port 0. +def BWFPDivider : ProcResource<1>; // Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5 // cycles after the memory operand. @@ -2260,13 +2262,19 @@ def BWWriteResGroup121 : SchedWriteRes<[BWPort1,BWPort23,BWPort06,BWPort0156]> { } def: InstRW<[BWWriteResGroup121], (instrs IMUL32m, MUL32m, MULX32rm)>; -def BWWriteResGroup122 : SchedWriteRes<[BWPort0]> { +def BWWriteResGroup122 : SchedWriteRes<[BWPort0,BWFPDivider]> { let Latency = 11; let NumMicroOps = 1; - let ResourceCycles = [1]; + let ResourceCycles = [1,5]; +} +def: InstRW<[BWWriteResGroup122], (instregex "(V?)DIVPSrr")>; + +def BWWriteResGroup122_1 : SchedWriteRes<[BWPort0,BWFPDivider]> { + let Latency = 11; + let NumMicroOps = 1; + let ResourceCycles = [1,3]; // Really 2.5 cycle throughput } -def: InstRW<[BWWriteResGroup122], (instregex "(V?)DIVPSrr", - "(V?)DIVSSrr")>; +def: InstRW<[BWWriteResGroup122_1], (instregex "(V?)DIVSSrr")>; def BWWriteResGroup123 : SchedWriteRes<[BWPort0,BWPort23]> { let Latency = 11; @@ -2381,28 +2389,40 @@ def BWWriteResGroup136 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> { } def: InstRW<[BWWriteResGroup136], (instregex "(V?)MPSADBWrmi")>; -def BWWriteResGroup137 : SchedWriteRes<[BWPort0]> { +def BWWriteResGroup137 : SchedWriteRes<[BWPort0,BWFPDivider]> { let Latency = 11; let NumMicroOps = 1; - let ResourceCycles = [1]; + let ResourceCycles = [1,7]; +} +def: InstRW<[BWWriteResGroup137], (instregex "(V?)SQRTPSr")>; + +def BWWriteResGroup137_1 : SchedWriteRes<[BWPort0,BWFPDivider]> { + let Latency = 11; + let NumMicroOps = 1; + let ResourceCycles = [1,4]; } -def: InstRW<[BWWriteResGroup137], (instregex "(V?)SQRTPSr", - "(V?)SQRTSSr")>; +def: InstRW<[BWWriteResGroup137_1], (instregex "(V?)SQRTSSr")>; def BWWriteResGroup138 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> { let Latency = 13; let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; + let ResourceCycles = [1,2,1,7]; } def: InstRW<[BWWriteResGroup138], (instregex "VMPSADBWYrmi")>; -def BWWriteResGroup139 : SchedWriteRes<[BWPort0]> { +def BWWriteResGroup139 : SchedWriteRes<[BWPort0,BWFPDivider]> { let Latency = 14; let NumMicroOps = 1; - let ResourceCycles = [1]; + let ResourceCycles = [1,8]; +} +def: InstRW<[BWWriteResGroup139], (instregex "(V?)DIVPDrr")>; + +def BWWriteResGroup139_1 : SchedWriteRes<[BWPort0,BWFPDivider]> { + let Latency = 14; + let NumMicroOps = 1; + let ResourceCycles = [1,4]; } -def: InstRW<[BWWriteResGroup139], (instregex "(V?)DIVPDrr", - "(V?)DIVSDrr")>; +def: InstRW<[BWWriteResGroup139_1], (instregex "(V?)DIVSDrr")>; def BWWriteResGroup141 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> { let Latency = 14; @@ -2463,10 +2483,10 @@ def BWWriteResGroup149 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort06,BWPo } def: InstRW<[BWWriteResGroup149], (instregex "RCL(8|16|32|64)mCL")>; -def BWWriteResGroup150 : SchedWriteRes<[BWPort0,BWPort23]> { +def BWWriteResGroup150 : SchedWriteRes<[BWPort0,BWPort23,BWFPDivider]> { let Latency = 16; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,5]; } def: InstRW<[BWWriteResGroup150], (instregex "(V?)DIVPSrm", "(V?)DIVSSrm")>; @@ -2492,10 +2512,10 @@ def BWWriteResGroup154 : SchedWriteRes<[BWPort5]> { } def: InstRW<[BWWriteResGroup154], (instrs VZEROALL)>; -def BWWriteResGroup155 : SchedWriteRes<[BWPort0,BWPort015]> { +def BWWriteResGroup155 : SchedWriteRes<[BWPort0,BWPort015,BWFPDivider]> { let Latency = 17; let NumMicroOps = 3; - let ResourceCycles = [2,1]; + let ResourceCycles = [2,1,10]; } def: InstRW<[BWWriteResGroup155], (instregex "VDIVPSYrr")>; @@ -2507,10 +2527,10 @@ def BWWriteResGroup156 : SchedWriteRes<[BWPort0,BWPort23,BWPort015]> { def: InstRW<[BWWriteResGroup156], (instregex "VRCPPSYm", "VRSQRTPSYm")>; -def BWWriteResGroup157 : SchedWriteRes<[BWPort0,BWPort23]> { +def BWWriteResGroup157 : SchedWriteRes<[BWPort0,BWPort23,BWFPDivider]> { let Latency = 16; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,7]; } def: InstRW<[BWWriteResGroup157], (instregex "(V?)SQRTPSm", "(V?)SQRTSSm")>; @@ -2530,10 +2550,10 @@ def BWWriteResGroup160 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort06,BWPo } def: InstRW<[BWWriteResGroup160], (instregex "RCR(8|16|32|64)mCL")>; -def BWWriteResGroup161 : SchedWriteRes<[BWPort0,BWPort23]> { +def BWWriteResGroup161 : SchedWriteRes<[BWPort0,BWPort23,BWFPDivider]> { let Latency = 19; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,8]; } def: InstRW<[BWWriteResGroup161], (instregex "(V?)DIVPDrm", "(V?)DIVSDrm")>; @@ -2570,13 +2590,19 @@ def: InstRW<[BWWriteResGroup167], (instregex "INSB", "INSL", "INSW")>; -def BWWriteResGroup168 : SchedWriteRes<[BWPort0]> { +def BWWriteResGroup168 : SchedWriteRes<[BWPort0,BWFPDivider]> { let Latency = 16; let NumMicroOps = 1; - let ResourceCycles = [1]; + let ResourceCycles = [1,14]; +} +def: InstRW<[BWWriteResGroup168], (instregex "(V?)SQRTPDr")>; + +def BWWriteResGroup168_1 : SchedWriteRes<[BWPort0,BWFPDivider]> { + let Latency = 16; + let NumMicroOps = 1; + let ResourceCycles = [1,8]; } -def: InstRW<[BWWriteResGroup168], (instregex "(V?)SQRTPDr", - "(V?)SQRTSDr")>; +def: InstRW<[BWWriteResGroup168_1], (instregex "(V?)SQRTSDr")>; def BWWriteResGroup169 : SchedWriteRes<[BWPort0,BWPort23]> { let Latency = 21; @@ -2586,10 +2612,10 @@ def BWWriteResGroup169 : SchedWriteRes<[BWPort0,BWPort23]> { def: InstRW<[BWWriteResGroup169], (instregex "DIV_F32m", "DIV_F64m")>; -def BWWriteResGroup170 : SchedWriteRes<[BWPort0,BWPort015]> { +def BWWriteResGroup170 : SchedWriteRes<[BWPort0,BWPort015,BWFPDivider]> { let Latency = 21; let NumMicroOps = 3; - let ResourceCycles = [2,1]; + let ResourceCycles = [2,1,14]; } def: InstRW<[BWWriteResGroup170], (instregex "VSQRTPSYr")>; @@ -2607,17 +2633,17 @@ def BWWriteResGroup172 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> { } def: InstRW<[BWWriteResGroup172], (instregex "POPF64")>; -def BWWriteResGroup173 : SchedWriteRes<[BWPort0,BWPort015]> { +def BWWriteResGroup173 : SchedWriteRes<[BWPort0,BWPort015,BWFPDivider]> { let Latency = 23; let NumMicroOps = 3; - let ResourceCycles = [2,1]; + let ResourceCycles = [2,1,16]; } def: InstRW<[BWWriteResGroup173], (instregex "VDIVPDYrr")>; -def BWWriteResGroup174 : SchedWriteRes<[BWPort0,BWPort23,BWPort015]> { +def BWWriteResGroup174 : SchedWriteRes<[BWPort0,BWPort23,BWPort015,BWFPDivider]> { let Latency = 23; let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; + let ResourceCycles = [2,1,1,10]; } def: InstRW<[BWWriteResGroup174], (instregex "VDIVPSYrm")>; @@ -2636,10 +2662,10 @@ def BWWriteResGroup177 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> { def: InstRW<[BWWriteResGroup177], (instregex "DIV_FI16m", "DIV_FI32m")>; -def BWWriteResGroup179 : SchedWriteRes<[BWPort0,BWPort23]> { +def BWWriteResGroup179 : SchedWriteRes<[BWPort0,BWPort23,BWFPDivider]> { let Latency = 21; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,14]; } def: InstRW<[BWWriteResGroup179], (instregex "(V?)SQRTPDm", "(V?)SQRTSDm")>; @@ -2652,10 +2678,10 @@ def BWWriteResGroup180 : SchedWriteRes<[BWPort0,BWPort23]> { def: InstRW<[BWWriteResGroup180], (instregex "DIVR_F32m", "DIVR_F64m")>; -def BWWriteResGroup181 : SchedWriteRes<[BWPort0,BWPort23,BWPort015]> { +def BWWriteResGroup181 : SchedWriteRes<[BWPort0,BWPort23,BWPort015,BWFPDivider]> { let Latency = 27; let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; + let ResourceCycles = [2,1,1,14]; } def: InstRW<[BWWriteResGroup181], (instregex "VSQRTPSYm")>; @@ -2667,10 +2693,10 @@ def BWWriteResGroup182 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> { def: InstRW<[BWWriteResGroup182], (instregex "DIVR_FI16m", "DIVR_FI32m")>; -def BWWriteResGroup183 : SchedWriteRes<[BWPort0,BWPort23,BWPort015]> { +def BWWriteResGroup183 : SchedWriteRes<[BWPort0,BWPort23,BWPort015,BWFPDivider]> { let Latency = 29; let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; + let ResourceCycles = [2,1,1,16]; } def: InstRW<[BWWriteResGroup183], (instregex "VDIVPDYrm")>; @@ -2745,10 +2771,10 @@ def BWWriteResGroup187 : SchedWriteRes<[BWPort01,BWPort15,BWPort015,BWPort0156]> } def: InstRW<[BWWriteResGroup187], (instregex "MMX_EMMS")>; -def BWWriteResGroup189 : SchedWriteRes<[BWPort0,BWPort015]> { - let Latency = 34; +def BWWriteResGroup189 : SchedWriteRes<[BWPort0,BWPort015,BWFPDivider]> { + let Latency = 29; let NumMicroOps = 3; - let ResourceCycles = [2,1]; + let ResourceCycles = [2,1,28]; } def: InstRW<[BWWriteResGroup189], (instregex "VSQRTPDYr")>; @@ -2782,10 +2808,10 @@ def BWWriteResGroup194 : SchedWriteRes<[BWPort5,BWPort6,BWPort23,BWPort237,BWPor def: InstRW<[BWWriteResGroup194], (instregex "OUT(8|16|32)ir", "OUT(8|16|32)rr")>; -def BWWriteResGroup195 : SchedWriteRes<[BWPort0,BWPort23,BWPort015]> { - let Latency = 40; +def BWWriteResGroup195 : SchedWriteRes<[BWPort0,BWPort23,BWPort015,BWFPDivider]> { + let Latency = 35; let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; + let ResourceCycles = [2,1,1,28]; } def: InstRW<[BWWriteResGroup195], (instregex "VSQRTPDYm")>; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 8ab64f00611..622a90a4227 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -69,6 +69,8 @@ def HWPortAny : ProcResGroup<[HWPort0, HWPort1, HWPort2, HWPort3, HWPort4, // Integer division issued on port 0. def HWDivider : ProcResource<1>; +// FP division and sqrt on port 0. +def HWFPDivider : ProcResource<1>; // Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5 // cycles after the memory operand. @@ -2394,17 +2396,17 @@ def: InstRW<[HWWriteResGroup91], (instregex "MMX_PMADDUBSWrm", "(V?)RCPSSm", "(V?)RSQRTSSm")>; -def HWWriteResGroup91_1 : SchedWriteRes<[HWPort0,HWPort23]> { +def HWWriteResGroup91_1 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> { let Latency = 16; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,7]; } def: InstRW<[HWWriteResGroup91_1], (instregex "(V?)SQRTSSm")>; -def HWWriteResGroup91_4 : SchedWriteRes<[HWPort0,HWPort23]> { +def HWWriteResGroup91_4 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> { let Latency = 18; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,7]; } def: InstRW<[HWWriteResGroup91_4], (instregex "(V?)DIVSSrm")>; @@ -2695,10 +2697,10 @@ def HWWriteResGroup120 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPo } def: InstRW<[HWWriteResGroup120], (instregex "RCL(8|16|32|64)mCL")>; -def HWWriteResGroup121 : SchedWriteRes<[HWPort0]> { +def HWWriteResGroup121 : SchedWriteRes<[HWPort0,HWFPDivider]> { let Latency = 13; let NumMicroOps = 1; - let ResourceCycles = [1]; + let ResourceCycles = [1,7]; } def: InstRW<[HWWriteResGroup121], (instregex "(V?)DIVPSrr", "(V?)DIVSSrr")>; @@ -2748,18 +2750,18 @@ def HWWriteResGroup132 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06,HWPo } def: InstRW<[HWWriteResGroup132], (instregex "CMPXCHG8B")>; -def HWWriteResGroup133 : SchedWriteRes<[HWPort0]> { +def HWWriteResGroup133 : SchedWriteRes<[HWPort0,HWFPDivider]> { let Latency = 11; let NumMicroOps = 1; - let ResourceCycles = [1]; + let ResourceCycles = [1,7]; } def: InstRW<[HWWriteResGroup133], (instregex "(V?)SQRTPSr", "(V?)SQRTSSr")>; -def HWWriteResGroup134 : SchedWriteRes<[HWPort0,HWPort23]> { +def HWWriteResGroup134 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> { let Latency = 19; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,7]; } def: InstRW<[HWWriteResGroup134], (instregex "(V?)DIVPSrm")>; @@ -2770,10 +2772,10 @@ def HWWriteResGroup135 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPo } def: InstRW<[HWWriteResGroup135], (instregex "RCR(8|16|32|64)mCL")>; -def HWWriteResGroup138 : SchedWriteRes<[HWPort0,HWPort23]> { +def HWWriteResGroup138 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> { let Latency = 17; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,7]; } def: InstRW<[HWWriteResGroup138], (instregex "(V?)SQRTPSm")>; @@ -2864,9 +2866,15 @@ def HWWriteResGroup154 : SchedWriteRes<[HWPort0]> { } def: InstRW<[HWWriteResGroup154], (instregex "DIV_FPrST0", "DIV_FST0r", - "DIV_FrST0", - "(V?)DIVPDrr", - "(V?)DIVSDrr")>; + "DIV_FrST0")>; + +def HWWriteResGroup154_1 : SchedWriteRes<[HWPort0,HWFPDivider]> { + let Latency = 20; + let NumMicroOps = 1; + let ResourceCycles = [1,14]; +} +def: InstRW<[HWWriteResGroup154_1], (instregex "(V?)DIVPDrr", + "(V?)DIVSDrr")>; def HWWriteResGroup155 : SchedWriteRes<[HWPort0,HWPort23]> { let Latency = 27; @@ -2876,31 +2884,31 @@ def HWWriteResGroup155 : SchedWriteRes<[HWPort0,HWPort23]> { def: InstRW<[HWWriteResGroup155], (instregex "DIVR_F32m", "DIVR_F64m")>; -def HWWriteResGroup155_1 : SchedWriteRes<[HWPort0,HWPort23]> { +def HWWriteResGroup155_1 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> { let Latency = 26; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,14]; } def: InstRW<[HWWriteResGroup155_1], (instregex "(V?)DIVPDrm")>; -def HWWriteResGroup155_2 : SchedWriteRes<[HWPort0,HWPort23]> { +def HWWriteResGroup155_2 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> { let Latency = 21; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,14]; } def: InstRW<[HWWriteResGroup155_2], (instregex "(V?)SQRTSDm")>; -def HWWriteResGroup155_3 : SchedWriteRes<[HWPort0,HWPort23]> { +def HWWriteResGroup155_3 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> { let Latency = 22; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,14]; } def: InstRW<[HWWriteResGroup155_3], (instregex "(V?)SQRTPDm")>; -def HWWriteResGroup155_4 : SchedWriteRes<[HWPort0,HWPort23]> { +def HWWriteResGroup155_4 : SchedWriteRes<[HWPort0,HWPort23,HWFPDivider]> { let Latency = 25; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,14]; } def: InstRW<[HWWriteResGroup155_4], (instregex "(V?)DIVSDrm")>; @@ -2911,26 +2919,26 @@ def HWWriteResGroup156 : SchedWriteRes<[HWPort5,HWPort6,HWPort0156]> { } def: InstRW<[HWWriteResGroup156], (instregex "MWAITrr")>; -def HWWriteResGroup157 : SchedWriteRes<[HWPort0]> { +def HWWriteResGroup157 : SchedWriteRes<[HWPort0,HWFPDivider]> { let Latency = 16; let NumMicroOps = 1; - let ResourceCycles = [1]; + let ResourceCycles = [1,14]; } def: InstRW<[HWWriteResGroup157], (instregex "(V?)SQRTPDr", "(V?)SQRTSDr")>; -def HWWriteResGroup159 : SchedWriteRes<[HWPort0,HWPort015]> { +def HWWriteResGroup159 : SchedWriteRes<[HWPort0,HWPort15,HWFPDivider]> { let Latency = 21; let NumMicroOps = 3; - let ResourceCycles = [2,1]; + let ResourceCycles = [2,1,14]; } def: InstRW<[HWWriteResGroup159], (instregex "VDIVPSYrr", "VSQRTPSYr")>; -def HWWriteResGroup160 : SchedWriteRes<[HWPort0,HWPort23,HWPort015]> { +def HWWriteResGroup160 : SchedWriteRes<[HWPort0,HWPort23,HWPort15,HWFPDivider]> { let Latency = 28; let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; + let ResourceCycles = [2,1,1,14]; } def: InstRW<[HWWriteResGroup160], (instregex "VDIVPSYrm", "VSQRTPSYm")>; @@ -3005,18 +3013,18 @@ def HWWriteResGroup172 : SchedWriteRes<[HWPort01,HWPort15,HWPort015,HWPort0156]> } def: InstRW<[HWWriteResGroup172], (instregex "MMX_EMMS")>; -def HWWriteResGroup173 : SchedWriteRes<[HWPort0,HWPort015]> { +def HWWriteResGroup173 : SchedWriteRes<[HWPort0,HWPort15,HWFPDivider]> { let Latency = 35; let NumMicroOps = 3; - let ResourceCycles = [2,1]; + let ResourceCycles = [2,1,28]; } def: InstRW<[HWWriteResGroup173], (instregex "VDIVPDYrr", "VSQRTPDYr")>; -def HWWriteResGroup174 : SchedWriteRes<[HWPort0,HWPort23,HWPort015]> { +def HWWriteResGroup174 : SchedWriteRes<[HWPort0,HWPort23,HWPort15,HWFPDivider]> { let Latency = 42; let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; + let ResourceCycles = [2,1,1,28]; } def: InstRW<[HWWriteResGroup174], (instregex "VDIVPDYrm", "VSQRTPDYm")>; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 85d9a89cee4..d9b2a640cc1 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -60,6 +60,8 @@ def SBPortAny : ProcResGroup<[SBPort0, SBPort1, SBPort23, SBPort4, SBPort5]> { // Integer division issued on port 0. def SBDivider : ProcResource<1>; +// FP division and sqrt on port 0. +def SBFPDivider : ProcResource<1>; // Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4 // cycles after the memory operand. @@ -1931,10 +1933,10 @@ def: InstRW<[SBWriteResGroup114], (instregex "ADD_FI16m", "SUB_FI16m", "SUB_FI32m")>; -def SBWriteResGroup116 : SchedWriteRes<[SBPort0]> { +def SBWriteResGroup116 : SchedWriteRes<[SBPort0,SBFPDivider]> { let Latency = 14; let NumMicroOps = 1; - let ResourceCycles = [1]; + let ResourceCycles = [1,14]; } def: InstRW<[SBWriteResGroup116], (instregex "(V?)SQRTSSr", "(V?)DIVPSrr", @@ -1964,36 +1966,36 @@ def SBWriteResGroup120 : SchedWriteRes<[SBPort0,SBPort1,SBPort5,SBPort23]> { } def: InstRW<[SBWriteResGroup120], (instregex "(V?)DPPDrmi")>; -def SBWriteResGroup123 : SchedWriteRes<[SBPort0,SBPort23]> { +def SBWriteResGroup123 : SchedWriteRes<[SBPort0,SBPort23,SBFPDivider]> { let Latency = 20; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,14]; } def: InstRW<[SBWriteResGroup123], (instregex "(V?)SQRTSSm", "(V?)DIVPSrm", "(V?)DIVSSrm", "(V?)SQRTPSm")>; -def SBWriteResGroup124 : SchedWriteRes<[SBPort0]> { +def SBWriteResGroup124 : SchedWriteRes<[SBPort0,SBFPDivider]> { let Latency = 21; let NumMicroOps = 1; - let ResourceCycles = [1]; + let ResourceCycles = [1,21]; } def: InstRW<[SBWriteResGroup124], (instregex "(V?)SQRTPDr", "(V?)SQRTSDr")>; -def SBWriteResGroup125 : SchedWriteRes<[SBPort0,SBPort23]> { +def SBWriteResGroup125 : SchedWriteRes<[SBPort0,SBPort23,SBFPDivider]> { let Latency = 27; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,21]; } def: InstRW<[SBWriteResGroup125], (instregex "(V?)SQRTPDm", "(V?)SQRTSDm")>; -def SBWriteResGroup126 : SchedWriteRes<[SBPort0]> { +def SBWriteResGroup126 : SchedWriteRes<[SBPort0,SBFPDivider]> { let Latency = 22; let NumMicroOps = 1; - let ResourceCycles = [1]; + let ResourceCycles = [1,22]; } def: InstRW<[SBWriteResGroup126], (instregex "(V?)DIVPDrr", "(V?)DIVSDrr")>; @@ -2010,18 +2012,18 @@ def: InstRW<[SBWriteResGroup127], (instregex "DIVR_FPrST0", "DIV_FST0r", "DIV_FrST0")>; -def SBWriteResGroup128 : SchedWriteRes<[SBPort0,SBPort23]> { +def SBWriteResGroup128 : SchedWriteRes<[SBPort0,SBPort23,SBFPDivider]> { let Latency = 28; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,22]; } def: InstRW<[SBWriteResGroup128], (instregex "(V?)DIVPDrm", "(V?)DIVSDrm")>; -def SBWriteResGroup129 : SchedWriteRes<[SBPort0,SBPort05]> { +def SBWriteResGroup129 : SchedWriteRes<[SBPort0,SBPort05,SBFPDivider]> { let Latency = 29; let NumMicroOps = 3; - let ResourceCycles = [2,1]; + let ResourceCycles = [2,1,28]; } def: InstRW<[SBWriteResGroup129], (instregex "VDIVPSYrr", "VSQRTPSYr")>; @@ -2046,26 +2048,26 @@ def: InstRW<[SBWriteResGroup131], (instregex "DIVR_FI16m", "DIV_FI16m", "DIV_FI32m")>; -def SBWriteResGroup132 : SchedWriteRes<[SBPort0,SBPort23,SBPort05]> { +def SBWriteResGroup132 : SchedWriteRes<[SBPort0,SBPort23,SBPort05,SBFPDivider]> { let Latency = 36; let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; + let ResourceCycles = [2,1,1,28]; } def: InstRW<[SBWriteResGroup132], (instregex "VDIVPSYrm", "VSQRTPSYm")>; -def SBWriteResGroup133 : SchedWriteRes<[SBPort0,SBPort05]> { +def SBWriteResGroup133 : SchedWriteRes<[SBPort0,SBPort05,SBFPDivider]> { let Latency = 45; let NumMicroOps = 3; - let ResourceCycles = [2,1]; + let ResourceCycles = [2,1,44]; } def: InstRW<[SBWriteResGroup133], (instregex "VDIVPDYrr", "VSQRTPDYr")>; -def SBWriteResGroup134 : SchedWriteRes<[SBPort0,SBPort23,SBPort05]> { +def SBWriteResGroup134 : SchedWriteRes<[SBPort0,SBPort23,SBPort05,SBFPDivider]> { let Latency = 52; let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; + let ResourceCycles = [2,1,1,44]; } def: InstRW<[SBWriteResGroup134], (instregex "VDIVPDYrm", "VSQRTPDYm")>; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 7e9f3c425fe..465afb30756 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -62,6 +62,8 @@ def SKLPort056 : ProcResGroup<[SKLPort0, SKLPort5, SKLPort6]>; def SKLPort0156: ProcResGroup<[SKLPort0, SKLPort1, SKLPort5, SKLPort6]>; def SKLDivider : ProcResource<1>; // Integer division issued on port 0. +// FP division and sqrt on port 0. +def SKLFPDivider : ProcResource<1>; // 60 Entry Unified Scheduler def SKLPortAny : ProcResGroup<[SKLPort0, SKLPort1, SKLPort2, SKLPort3, SKLPort4, @@ -2343,14 +2345,21 @@ def SKLWriteResGroup144 : SchedWriteRes<[SKLPort05,SKLPort0156]> { } def: InstRW<[SKLWriteResGroup144], (instregex "MMX_EMMS")>; -def SKLWriteResGroup145 : SchedWriteRes<[SKLPort0]> { +def SKLWriteResGroup145 : SchedWriteRes<[SKLPort0,SKLFPDivider]> { let Latency = 11; let NumMicroOps = 1; - let ResourceCycles = [1]; + let ResourceCycles = [1,3]; } -def: InstRW<[SKLWriteResGroup145], (instregex "(V?)DIVPS(Y?)rr", +def: InstRW<[SKLWriteResGroup145], (instregex "(V?)DIVPSrr", "(V?)DIVSSrr")>; +def SKLWriteResGroup145_1 : SchedWriteRes<[SKLPort0,SKLFPDivider]> { + let Latency = 11; + let NumMicroOps = 1; + let ResourceCycles = [1,5]; +} +def: InstRW<[SKLWriteResGroup145_1], (instregex "VDIVPSYrr")>; + def SKLWriteResGroup146 : SchedWriteRes<[SKLPort0,SKLPort23]> { let Latency = 11; let NumMicroOps = 2; @@ -2468,14 +2477,21 @@ def SKLWriteResGroup156 : SchedWriteRes<[SKLPort06,SKLPort0156]> { } def: InstRW<[SKLWriteResGroup156], (instrs LOOPE, LOOPNE)>; -def SKLWriteResGroup157 : SchedWriteRes<[SKLPort0]> { +def SKLWriteResGroup157 : SchedWriteRes<[SKLPort0,SKLFPDivider]> { let Latency = 12; let NumMicroOps = 1; - let ResourceCycles = [1]; + let ResourceCycles = [1,3]; } -def: InstRW<[SKLWriteResGroup157], (instregex "(V?)SQRTPS(Y?)r", +def: InstRW<[SKLWriteResGroup157], (instregex "(V?)SQRTPSr", "(V?)SQRTSSr")>; +def SKLWriteResGroup158 : SchedWriteRes<[SKLPort0,SKLFPDivider]> { + let Latency = 12; + let NumMicroOps = 1; + let ResourceCycles = [1,6]; +} +def: InstRW<[SKLWriteResGroup158], (instregex "VSQRTPSYr")>; + def SKLWriteResGroup159 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> { let Latency = 12; let NumMicroOps = 4; @@ -2529,14 +2545,21 @@ def: InstRW<[SKLWriteResGroup165], (instregex "VHADDPDYrm", "VHSUBPDYrm", "VHSUBPSYrm")>; -def SKLWriteResGroup166 : SchedWriteRes<[SKLPort0]> { +def SKLWriteResGroup166 : SchedWriteRes<[SKLPort0,SKLFPDivider]> { let Latency = 14; let NumMicroOps = 1; - let ResourceCycles = [1]; + let ResourceCycles = [1,3]; } -def: InstRW<[SKLWriteResGroup166], (instregex "(V?)DIVPD(Y?)rr", +def: InstRW<[SKLWriteResGroup166], (instregex "(V?)DIVPDrr", "(V?)DIVSDrr")>; +def SKLWriteResGroup166_1 : SchedWriteRes<[SKLPort0,SKLFPDivider]> { + let Latency = 14; + let NumMicroOps = 1; + let ResourceCycles = [1,5]; +} +def: InstRW<[SKLWriteResGroup166_1], (instregex "VDIVPDYrr")>; + def SKLWriteResGroup168 : SchedWriteRes<[SKLPort23,SKLPort01]> { let Latency = 14; let NumMicroOps = 3; @@ -2600,10 +2623,10 @@ def SKLWriteResGroup174 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06 } def: InstRW<[SKLWriteResGroup174], (instregex "RCL(8|16|32|64)mCL")>; -def SKLWriteResGroup175 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup175 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { let Latency = 16; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,3]; } def: InstRW<[SKLWriteResGroup175], (instregex "(V?)DIVSSrm")>; @@ -2621,13 +2644,19 @@ def SKLWriteResGroup178 : SchedWriteRes<[SKLPort0156]> { } def: InstRW<[SKLWriteResGroup178], (instregex "VZEROALL")>; -def SKLWriteResGroup179 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup179 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { let Latency = 17; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,5]; } -def: InstRW<[SKLWriteResGroup179], (instregex "(V?)DIVPSrm", - "(V?)SQRTSSm")>; +def: InstRW<[SKLWriteResGroup179], (instregex "(V?)DIVPSrm")>; + +def SKLWriteResGroup179_1 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { + let Latency = 17; + let NumMicroOps = 2; + let ResourceCycles = [1,1,3]; +} +def: InstRW<[SKLWriteResGroup179_1], (instregex "(V?)SQRTSSm")>; def SKLWriteResGroup180 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156]> { let Latency = 17; @@ -2636,21 +2665,34 @@ def SKLWriteResGroup180 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKL } def: InstRW<[SKLWriteResGroup180], (instregex "XCH_F")>; -def SKLWriteResGroup181 : SchedWriteRes<[SKLPort0]> { +def SKLWriteResGroup181 : SchedWriteRes<[SKLPort0,SKLFPDivider]> { let Latency = 18; let NumMicroOps = 1; - let ResourceCycles = [1]; + let ResourceCycles = [1,6]; } -def: InstRW<[SKLWriteResGroup181], (instregex "(V?)SQRTPD(Y?)r", +def: InstRW<[SKLWriteResGroup181], (instregex "(V?)SQRTPDr", "(V?)SQRTSDr")>; -def SKLWriteResGroup182 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup181_1 : SchedWriteRes<[SKLPort0,SKLFPDivider]> { + let Latency = 18; + let NumMicroOps = 1; + let ResourceCycles = [1,12]; +} +def: InstRW<[SKLWriteResGroup181_1], (instregex "VSQRTPDYr")>; + +def SKLWriteResGroup182 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { let Latency = 18; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,5]; +} +def: InstRW<[SKLWriteResGroup182], (instregex "VDIVPSYrm")>; + +def SKLWriteResGroup183 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { + let Latency = 18; + let NumMicroOps = 2; + let ResourceCycles = [1,1,3]; } -def: InstRW<[SKLWriteResGroup182], (instregex "VDIVPSYrm", - "(V?)SQRTPSm")>; +def: InstRW<[SKLWriteResGroup183], (instregex "(V?)SQRTPSm")>; def SKLWriteResGroup184 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort06,SKLPort0156]> { let Latency = 18; @@ -2666,13 +2708,19 @@ def SKLWriteResGroup185 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06 } def: InstRW<[SKLWriteResGroup185], (instregex "RCR(8|16|32|64)mCL")>; -def SKLWriteResGroup186 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup186 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { let Latency = 19; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,4]; +} +def: InstRW<[SKLWriteResGroup186], (instregex "(V?)DIVSDrm")>; + +def SKLWriteResGroup186_1 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { + let Latency = 19; + let NumMicroOps = 2; + let ResourceCycles = [1,1,6]; } -def: InstRW<[SKLWriteResGroup186], (instregex "(V?)DIVSDrm", - "VSQRTPSYm")>; +def: InstRW<[SKLWriteResGroup186_1], (instregex "VSQRTPSYm")>; def SKLWriteResGroup187 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> { let Latency = 19; @@ -2690,10 +2738,10 @@ def: InstRW<[SKLWriteResGroup189], (instregex "DIV_FPrST0", "DIV_FST0r", "DIV_FrST0")>; -def SKLWriteResGroup190 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup190 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { let Latency = 20; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,4]; } def: InstRW<[SKLWriteResGroup190], (instregex "(V?)DIVPDrm")>; @@ -2720,10 +2768,10 @@ def SKLWriteResGroup193 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort0156]> { } def: InstRW<[SKLWriteResGroup193], (instregex "MWAITrr")>; -def SKLWriteResGroup195 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup195 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { let Latency = 21; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,8]; } def: InstRW<[SKLWriteResGroup195], (instregex "VDIVPDYrm")>; @@ -2763,10 +2811,10 @@ def: InstRW<[SKLWriteResGroup196_2], (instrs VGATHERDPSYrm, VPGATHERQQYrm, VGATHERDPDYrm)>; -def SKLWriteResGroup197 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup197 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { let Latency = 23; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,6]; } def: InstRW<[SKLWriteResGroup197], (instregex "(V?)SQRTSDm")>; @@ -2777,17 +2825,17 @@ def SKLWriteResGroup198 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort5,SKLPort23,SK } def: InstRW<[SKLWriteResGroup198], (instregex "CMPXCHG16B")>; -def SKLWriteResGroup199 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup199 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { let Latency = 24; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,6]; } def: InstRW<[SKLWriteResGroup199], (instregex "(V?)SQRTPDm")>; -def SKLWriteResGroup201 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup201 : SchedWriteRes<[SKLPort0,SKLPort23,SKLFPDivider]> { let Latency = 25; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,12]; } def: InstRW<[SKLWriteResGroup201], (instregex "VSQRTPDYm")>; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 5597ba9e826..610fb68a2bb 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -62,6 +62,8 @@ def SKXPort056 : ProcResGroup<[SKXPort0, SKXPort5, SKXPort6]>; def SKXPort0156: ProcResGroup<[SKXPort0, SKXPort1, SKXPort5, SKXPort6]>; def SKXDivider : ProcResource<1>; // Integer division issued on port 0. +// FP division and sqrt on port 0. +def SKXFPDivider : ProcResource<1>; // 60 Entry Unified Scheduler def SKXPortAny : ProcResGroup<[SKXPort0, SKXPort1, SKXPort2, SKXPort3, SKXPort4, @@ -5044,19 +5046,20 @@ def SKXWriteResGroup158 : SchedWriteRes<[SKXPort05,SKXPort0156]> { } def: InstRW<[SKXWriteResGroup158], (instregex "MMX_EMMS")>; -def SKXWriteResGroup159 : SchedWriteRes<[SKXPort0]> { +def SKXWriteResGroup159 : SchedWriteRes<[SKXPort0,SKXFPDivider]> { let Latency = 11; let NumMicroOps = 1; - let ResourceCycles = [1]; + let ResourceCycles = [1,3]; +} +def: InstRW<[SKXWriteResGroup159], (instregex "(V?)DIVPS(Z128)?rr", + "(V?)DIVSS(Z?)rr")>; + +def SKXWriteResGroup159_1 : SchedWriteRes<[SKXPort0,SKXFPDivider]> { + let Latency = 11; + let NumMicroOps = 1; + let ResourceCycles = [1,5]; } -def: InstRW<[SKXWriteResGroup159], (instregex "DIVPSrr", - "DIVSSrr", - "VDIVPSYrr", - "VDIVPSZ128rr(b?)", - "VDIVPSZ256rr(b?)", - "VDIVPSrr", - "VDIVSSZrr", - "VDIVSSrr")>; +def: InstRW<[SKXWriteResGroup159_1], (instregex "VDIVPS(Y|Z256)rr")>; def SKXWriteResGroup160 : SchedWriteRes<[SKXPort0,SKXPort23]> { let Latency = 11; @@ -5325,19 +5328,20 @@ def SKXWriteResGroup171 : SchedWriteRes<[SKXPort06,SKXPort0156]> { } def: InstRW<[SKXWriteResGroup171], (instrs LOOPE, LOOPNE)>; -def SKXWriteResGroup172 : SchedWriteRes<[SKXPort0]> { +def SKXWriteResGroup172 : SchedWriteRes<[SKXPort0,SKXFPDivider]> { let Latency = 12; let NumMicroOps = 1; - let ResourceCycles = [1]; + let ResourceCycles = [1,3]; +} +def: InstRW<[SKXWriteResGroup172], (instregex "(V?)SQRTPS(Z128)?r", + "(V?)SQRTSS(Z?)r")>; + +def SKXWriteResGroup173 : SchedWriteRes<[SKXPort0,SKXFPDivider]> { + let Latency = 12; + let NumMicroOps = 1; + let ResourceCycles = [1,6]; } -def: InstRW<[SKXWriteResGroup172], (instregex "SQRTPSr", - "SQRTSSr", - "VSQRTPSYr", - "VSQRTPSZ128r", - "VSQRTPSZ256r", - "VSQRTPSr", - "VSQRTSSZr", - "VSQRTSSr")>; +def: InstRW<[SKXWriteResGroup173], (instregex "VSQRTPS(Y|Z256)r")>; def SKXWriteResGroup174 : SchedWriteRes<[SKXPort015]> { let Latency = 12; @@ -5434,19 +5438,20 @@ def: InstRW<[SKXWriteResGroup183], (instregex "VHADDPDYrm", "VPERMI2W128rm(b?)", "VPERMT2W128rm(b?)")>; -def SKXWriteResGroup184 : SchedWriteRes<[SKXPort0]> { +def SKXWriteResGroup184 : SchedWriteRes<[SKXPort0,SKXFPDivider]> { let Latency = 14; let NumMicroOps = 1; - let ResourceCycles = [1]; + let ResourceCycles = [1,3]; +} +def: InstRW<[SKXWriteResGroup184], (instregex "(V?)DIVPDrr", + "(V?)DIVSD(Z?)rr")>; + +def SKXWriteResGroup184_1 : SchedWriteRes<[SKXPort0,SKXFPDivider]> { + let Latency = 14; + let NumMicroOps = 1; + let ResourceCycles = [1,5]; } -def: InstRW<[SKXWriteResGroup184], (instregex "DIVPDrr", - "DIVSDrr", - "VDIVPDYrr", - "VDIVPDZ128rr(b?)", - "VDIVPDZ256rr(b?)", - "VDIVPDrr", - "VDIVSDZrr", - "VDIVSDrr")>; +def: InstRW<[SKXWriteResGroup184_1], (instregex "VDIVPD(Y|Z256)rr")>; def SKXWriteResGroup186 : SchedWriteRes<[SKXPort23,SKXPort015]> { let Latency = 14; @@ -5564,12 +5569,12 @@ def SKXWriteResGroup195 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06 } def: InstRW<[SKXWriteResGroup195], (instregex "RCL(8|16|32|64)mCL")>; -def SKXWriteResGroup196 : SchedWriteRes<[SKXPort0,SKXPort23]> { +def SKXWriteResGroup196 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { let Latency = 16; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,3]; } -def: InstRW<[SKXWriteResGroup196], (instregex "(V?)DIVSSrm")>; +def: InstRW<[SKXWriteResGroup196], (instregex "(V?)DIVSS(Z?)rm")>; def SKXWriteResGroup198 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> { let Latency = 16; @@ -5595,18 +5600,19 @@ def SKXWriteResGroup200 : SchedWriteRes<[SKXPort0156]> { } def: InstRW<[SKXWriteResGroup200], (instregex "VZEROALL")>; -def SKXWriteResGroup201 : SchedWriteRes<[SKXPort0,SKXPort23]> { +def SKXWriteResGroup201 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { let Latency = 17; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,5]; } -def: InstRW<[SKXWriteResGroup201], (instregex "DIVPSrm", - "SQRTSSm", - "VDIVPSZ128rm(b?)", - "VDIVPSrm", - "VDIVSSZrm", - "VSQRTSSm", - "VSQRTSSZm")>; +def: InstRW<[SKXWriteResGroup201], (instregex "(V?)DIVPS(Z128)?rm")>; + +def SKXWriteResGroup201_1 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { + let Latency = 17; + let NumMicroOps = 2; + let ResourceCycles = [1,1,3]; +} +def: InstRW<[SKXWriteResGroup201_1], (instregex "(V?)SQRTSS(Z?)m")>; def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156]> { let Latency = 17; @@ -5615,30 +5621,34 @@ def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKX } def: InstRW<[SKXWriteResGroup202], (instregex "XCH_F")>; -def SKXWriteResGroup203 : SchedWriteRes<[SKXPort0]> { +def SKXWriteResGroup203 : SchedWriteRes<[SKXPort0,SKXFPDivider]> { let Latency = 18; let NumMicroOps = 1; - let ResourceCycles = [1]; + let ResourceCycles = [1,6]; +} +def: InstRW<[SKXWriteResGroup203], (instregex "(V?)SQRTPD(Z128)?r", + "(V?)SQRTSD(Z?)r")>; + +def SKXWriteResGroup203_1 : SchedWriteRes<[SKXPort0,SKXFPDivider]> { + let Latency = 18; + let NumMicroOps = 1; + let ResourceCycles = [1,12]; } -def: InstRW<[SKXWriteResGroup203], (instregex "SQRTPDr", - "SQRTSDr", - "VSQRTPDYr", - "VSQRTPDZ128r(b?)", - "VSQRTPDZ256r(b?)", - "VSQRTPDr", - "VSQRTSDZr", - "VSQRTSDr")>; +def: InstRW<[SKXWriteResGroup203_1], (instregex "VSQRTPD(Y|Z256)r")>; -def SKXWriteResGroup204 : SchedWriteRes<[SKXPort0,SKXPort23]> { +def SKXWriteResGroup204 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { let Latency = 18; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,5]; +} +def: InstRW<[SKXWriteResGroup204], (instregex "VDIVPS(Y|Z256)rm")>; + +def SKXWriteResGroup204_1 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { + let Latency = 18; + let NumMicroOps = 2; + let ResourceCycles = [1,1,3]; } -def: InstRW<[SKXWriteResGroup204], (instregex "SQRTPSm", - "VDIVPSYrm", - "VDIVPSZ256rm(b?)", - "VSQRTPSZ128m(b?)", - "VSQRTPSm")>; +def: InstRW<[SKXWriteResGroup204_1], (instregex "(V?)SQRTPS(Z128)?m")>; def SKXWriteResGroup205 : SchedWriteRes<[SKXPort23,SKXPort015]> { let Latency = 18; @@ -5661,16 +5671,21 @@ def SKXWriteResGroup208 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06 } def: InstRW<[SKXWriteResGroup208], (instregex "RCR(8|16|32|64)mCL")>; -def SKXWriteResGroup209 : SchedWriteRes<[SKXPort0,SKXPort23]> { +def SKXWriteResGroup209 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { let Latency = 19; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,4]; +} +def: InstRW<[SKXWriteResGroup209], (instregex "(V?)DIVSD(Z?)rm")>; + +def SKXWriteResGroup209_1 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { + let Latency = 19; + let NumMicroOps = 2; + let ResourceCycles = [1,1,6]; } -def: InstRW<[SKXWriteResGroup209], (instregex "DIVSDrm", - "VDIVSDrm", - "VSQRTPSYm", - "VSQRTPSZ256m(b?)")>; +def: InstRW<[SKXWriteResGroup209_1], (instregex "VSQRTPS(Y|Z256)m")>; +//FIXME def SKXWriteResGroup210 : SchedWriteRes<[SKXPort0,SKXPort015]> { let Latency = 19; let NumMicroOps = 3; @@ -5710,14 +5725,12 @@ def: InstRW<[SKXWriteResGroup215], (instregex "DIV_FPrST0", "DIV_FST0r", "DIV_FrST0")>; -def SKXWriteResGroup216 : SchedWriteRes<[SKXPort0,SKXPort23]> { +def SKXWriteResGroup216 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { let Latency = 20; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,4]; } -def: InstRW<[SKXWriteResGroup216], (instregex "(V?)DIVPDrm", - "VDIVPDZ128rm(b?)", - "VDIVSDZrm")>; +def: InstRW<[SKXWriteResGroup216], (instregex "(V?)DIVPD(Z128)?rm")>; def SKXWriteResGroup217 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { let Latency = 20; @@ -5752,13 +5765,12 @@ def SKXWriteResGroup220 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort0156]> { } def: InstRW<[SKXWriteResGroup220], (instregex "MWAITrr")>; -def SKXWriteResGroup222 : SchedWriteRes<[SKXPort0,SKXPort23]> { +def SKXWriteResGroup222 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { let Latency = 21; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,8]; } -def: InstRW<[SKXWriteResGroup222], (instregex "VDIVPDYrm", - "VDIVPDZ256rm(b?)")>; +def: InstRW<[SKXWriteResGroup222], (instregex "VDIVPD(Y|Z256)rm")>; def SKXWriteResGroup223 : SchedWriteRes<[SKXPort0,SKXPort23]> { let Latency = 22; @@ -5828,13 +5840,14 @@ def SKXWriteResGroup225 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> { def: InstRW<[SKXWriteResGroup225], (instregex "VPCONFLICTDZ128rr", "VPCONFLICTQZ256rr")>; -def SKXWriteResGroup226 : SchedWriteRes<[SKXPort0,SKXPort23]> { +def SKXWriteResGroup226 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { let Latency = 23; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,6]; } -def: InstRW<[SKXWriteResGroup226], (instregex "(V?)SQRTSDm")>; +def: InstRW<[SKXWriteResGroup226], (instregex "(V?)SQRTSD(Z?)m")>; +// FIXME def SKXWriteResGroup227 : SchedWriteRes<[SKXPort0,SKXPort015]> { let Latency = 23; let NumMicroOps = 3; @@ -5850,16 +5863,14 @@ def SKXWriteResGroup228 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort23,SK } def: InstRW<[SKXWriteResGroup228], (instregex "CMPXCHG16B")>; -def SKXWriteResGroup229 : SchedWriteRes<[SKXPort0,SKXPort23]> { +def SKXWriteResGroup229 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { let Latency = 24; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,6]; } -def: InstRW<[SKXWriteResGroup229], (instregex "SQRTPDm", - "VSQRTPDZ128m(b?)", - "VSQRTPDm", - "VSQRTSDZm")>; +def: InstRW<[SKXWriteResGroup229], (instregex "(V?)SQRTPD(Z128)?m")>; +//FIXME def SKXWriteResGroup230 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> { let Latency = 24; let NumMicroOps = 4; @@ -5867,13 +5878,12 @@ def SKXWriteResGroup230 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> { } def: InstRW<[SKXWriteResGroup230], (instregex "VDIVPSZrm(b?)")>; -def SKXWriteResGroup232 : SchedWriteRes<[SKXPort0,SKXPort23]> { +def SKXWriteResGroup232 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> { let Latency = 25; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1,1,12]; } -def: InstRW<[SKXWriteResGroup232], (instregex "VSQRTPDYm", - "VSQRTPDZ256m(b?)")>; +def: InstRW<[SKXWriteResGroup232], (instregex "VSQRTPD(Y|Z256)m")>; def SKXWriteResGroup233 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { let Latency = 25; @@ -5894,6 +5904,7 @@ def: InstRW<[SKXWriteResGroup234], (instrs VGATHERDPDZ256rm, VPGATHERQDZrm, VPGATHERQQZ256rm)>; +// FIXME def SKXWriteResGroup237 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> { let Latency = 26; let NumMicroOps = 4; @@ -5949,6 +5960,7 @@ def SKXWriteResGroup243 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { def: InstRW<[SKXWriteResGroup243], (instregex "DIVR_FI16m", "DIVR_FI32m")>; +// FIXME def SKXWriteResGroup244 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> { let Latency = 30; let NumMicroOps = 4; @@ -5964,6 +5976,7 @@ def SKXWriteResGroup245 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort01 def: InstRW<[SKXWriteResGroup245], (instrs VGATHERDPSZrm, VPGATHERDDZrm)>; +// FIXME def SKXWriteResGroup246 : SchedWriteRes<[SKXPort0,SKXPort015]> { let Latency = 31; let NumMicroOps = 3; @@ -6002,6 +6015,7 @@ def SKXWriteResGroup250 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort23,SKXPort0156 } def: InstRW<[SKXWriteResGroup250], (instregex "XRSTOR(64)?")>; +// FIXME def SKXWriteResGroup251 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> { let Latency = 38; let NumMicroOps = 4; |

