diff options
| author | Craig Topper <craig.topper@intel.com> | 2019-03-07 21:22:56 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2019-03-07 21:22:56 +0000 |
| commit | d0c2dba644e27210ed13cd638aa8b8e677ed757d (patch) | |
| tree | 8f6b0772500653e0f099b1c28ac044bebae8b740 /llvm/lib | |
| parent | b3af5d3e57107a3bffe4c2d38b22ae96cee52245 (diff) | |
| download | bcm5719-llvm-d0c2dba644e27210ed13cd638aa8b8e677ed757d.tar.gz bcm5719-llvm-d0c2dba644e27210ed13cd638aa8b8e677ed757d.zip | |
[X86] Correct scheduler information for rotate by constant for Haswell, Broadwell, and Skylake.
Rotate with explicit immediate is a single uop from Haswell on. An immediate of 1 has a dependency on the previous writer of flags, but the other immediate values do not.
The implicit rotate by 1 instruction is 2 uops. But the flags are merged after the rotate uop so the data result does not see the flag dependency. But I don't think we have any way of modeling that.
RORX is 1 uop without the load. 2 uops with the load. We currently model these with WriteShift/WriteShiftLd.
Differential Revision: https://reviews.llvm.org/D59077
llvm-svn: 355636
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedBroadwell.td | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedHaswell.td | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedSkylakeClient.td | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedSkylakeServer.td | 10 |
4 files changed, 36 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index fe96f203d3a..6d69cb01eee 100644 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -187,7 +187,7 @@ defm : BWWriteResPair<WritePOPCNT, [BWPort1], 3>; // Integer shifts and rotates. defm : BWWriteResPair<WriteShift, [BWPort06], 1>; defm : BWWriteResPair<WriteShiftCL, [BWPort06,BWPort0156], 3, [2,1], 3>; -defm : BWWriteResPair<WriteRotate, [BWPort06], 2, [2], 2>; +defm : BWWriteResPair<WriteRotate, [BWPort06], 1, [1], 1>; defm : BWWriteResPair<WriteRotateCL, [BWPort06,BWPort0156], 3, [2,1], 3>; // SHLD/SHRD. @@ -1105,6 +1105,14 @@ def BWWriteResGroup87 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06]> { def: InstRW<[BWWriteResGroup87], (instregex "ROL(8|16|32|64)m(1|i)", "ROR(8|16|32|64)m(1|i)")>; +def BWWriteResGroup87_1 : SchedWriteRes<[BWPort06]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[BWWriteResGroup87_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1, + ROR8r1, ROR16r1, ROR32r1, ROR64r1)>; + def BWWriteResGroup88 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort0156]> { let Latency = 7; let NumMicroOps = 5; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 5fc838a2ce3..cb0e56432fb 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -152,7 +152,7 @@ defm : X86WriteRes<WriteXCHG, [HWPort0156], 2, [3], 3>; // Integer shifts and rotates. defm : HWWriteResPair<WriteShift, [HWPort06], 1>; defm : HWWriteResPair<WriteShiftCL, [HWPort06, HWPort0156], 3, [2,1], 3>; -defm : HWWriteResPair<WriteRotate, [HWPort06], 2, [2], 2>; +defm : HWWriteResPair<WriteRotate, [HWPort06], 1, [1], 1>; defm : HWWriteResPair<WriteRotateCL, [HWPort06, HWPort0156], 3, [2,1], 3>; // SHLD/SHRD. @@ -1183,6 +1183,14 @@ def HWWriteResGroup46 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06]> { def: InstRW<[HWWriteResGroup46], (instregex "ROL(8|16|32|64)m(1|i)", "ROR(8|16|32|64)m(1|i)")>; +def HWWriteResGroup46_1 : SchedWriteRes<[HWPort06]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[HWWriteResGroup46_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1, + ROR8r1, ROR16r1, ROR32r1, ROR64r1)>; + def HWWriteResGroup47 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> { let Latency = 8; let NumMicroOps = 5; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 9e5f7958460..ff6a0e35c5c 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -184,7 +184,7 @@ defm : SKLWriteResPair<WritePOPCNT, [SKLPort1], 3>; // Integer shifts and rotates. defm : SKLWriteResPair<WriteShift, [SKLPort06], 1>; defm : SKLWriteResPair<WriteShiftCL, [SKLPort06], 3, [3], 3>; -defm : SKLWriteResPair<WriteRotate, [SKLPort06], 2, [2], 2>; +defm : SKLWriteResPair<WriteRotate, [SKLPort06], 1, [1], 1>; defm : SKLWriteResPair<WriteRotateCL, [SKLPort06], 3, [3], 3>; // SHLD/SHRD. @@ -1185,6 +1185,14 @@ def SKLWriteResGroup100 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06 def: InstRW<[SKLWriteResGroup100], (instregex "ROL(8|16|32|64)m(1|i)", "ROR(8|16|32|64)m(1|i)")>; +def SKLWriteResGroup100_1 : SchedWriteRes<[SKLPort06]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SKLWriteResGroup100_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1, + ROR8r1, ROR16r1, ROR32r1, ROR64r1)>; + def SKLWriteResGroup101 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort0156]> { let Latency = 7; let NumMicroOps = 5; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 4747dafb857..0fdeba7cef3 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -177,7 +177,7 @@ defm : X86WriteRes<WriteBitTestSetRegLd, [SKXPort0156,SKXPort23], 5, [1,1], 2>; // Integer shifts and rotates. defm : SKXWriteResPair<WriteShift, [SKXPort06], 1>; defm : SKXWriteResPair<WriteShiftCL, [SKXPort06], 3, [3], 3>; -defm : SKXWriteResPair<WriteRotate, [SKXPort06], 2, [2], 2>; +defm : SKXWriteResPair<WriteRotate, [SKXPort06], 1, [1], 1>; defm : SKXWriteResPair<WriteRotateCL, [SKXPort06], 3, [3], 3>; // SHLD/SHRD. @@ -1448,6 +1448,14 @@ def SKXWriteResGroup107 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06 def: InstRW<[SKXWriteResGroup107], (instregex "ROL(8|16|32|64)m(1|i)", "ROR(8|16|32|64)m(1|i)")>; +def SKXWriteResGroup107_1 : SchedWriteRes<[SKXPort06]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SKXWriteResGroup107_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1, + ROR8r1, ROR16r1, ROR32r1, ROR64r1)>; + def SKXWriteResGroup108 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> { let Latency = 7; let NumMicroOps = 5; |

