diff options
author | Andrew V. Tischenko <andrew.v.tischenko@gmail.com> | 2018-08-30 06:26:00 +0000 |
---|---|---|
committer | Andrew V. Tischenko <andrew.v.tischenko@gmail.com> | 2018-08-30 06:26:00 +0000 |
commit | 62f7a3207b2f1a78afc9dbbfe20f4327f7283020 (patch) | |
tree | a460edc584f0001dbb8365d21bc6266a4f72b60a | |
parent | f0531da109e5b9963327df10bcbe94b9e6160665 (diff) | |
download | bcm5719-llvm-62f7a3207b2f1a78afc9dbbfe20f4327f7283020.tar.gz bcm5719-llvm-62f7a3207b2f1a78afc9dbbfe20f4327f7283020.zip |
[X86] Improved sched model for X86 CMPXCHG* instructions.
Differential Revision: https://reviews.llvm.org/D50070
llvm-svn: 341024
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.td | 4 | ||||
-rwxr-xr-x | llvm/lib/Target/X86/X86SchedBroadwell.td | 12 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedHaswell.td | 12 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedSandyBridge.td | 21 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedSkylakeClient.td | 10 | ||||
-rwxr-xr-x | llvm/lib/Target/X86/X86SchedSkylakeServer.td | 10 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86Schedule.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleAtom.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleBtVer2.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleSLM.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleZnver1.td | 9 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/schedule-x86_64.ll | 16 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/X86/Generic/resources-x86_64.s | 18 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/X86/SandyBridge/resources-x86_64.s | 20 |
14 files changed, 60 insertions, 90 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index bbc4e2bdd68..b401b6b879e 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -2061,7 +2061,7 @@ def XADD64rm : RI<0xC1, MRMSrcMem, (outs GR64:$dst), } -let SchedRW = [WriteALU], hasSideEffects = 0 in { +let SchedRW = [WriteCMPXCHG], hasSideEffects = 0 in { let Defs = [AL, EFLAGS], Uses = [AL] in def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src), "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB, @@ -2080,7 +2080,7 @@ def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), NotMemoryFoldable; } // SchedRW, hasSideEffects -let SchedRW = [WriteALULd, WriteRMW], mayLoad = 1, mayStore = 1, +let SchedRW = [WriteCMPXCHGRMW], mayLoad = 1, mayStore = 1, hasSideEffects = 0 in { let Defs = [AL, EFLAGS], Uses = [AL] in def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 543fabf3cfa..ad4a10a2e15 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -119,6 +119,8 @@ defm : BWWriteResPair<WriteIDiv16, [BWPort0, BWDivider], 25, [1, 10]>; defm : BWWriteResPair<WriteIDiv32, [BWPort0, BWDivider], 25, [1, 10]>; defm : BWWriteResPair<WriteIDiv64, [BWPort0, BWDivider], 25, [1, 10]>; +defm : X86WriteRes<WriteCMPXCHG,[BWPort06, BWPort0156], 5, [2, 3], 5>; +defm : X86WriteRes<WriteCMPXCHGRMW,[BWPort23, BWPort06, BWPort0156, BWPort237, BWPort4], 8, [1, 2, 1, 1, 1], 6>; defm : X86WriteRes<WriteBSWAP32, [BWPort15], 1, [1], 1>; defm : X86WriteRes<WriteBSWAP64, [BWPort06, BWPort15], 2, [1, 1], 2>; defm : X86WriteRes<WriteXCHG, [BWPort0156], 2, [3], 3>; @@ -939,13 +941,6 @@ def BWWriteResGroup55 : SchedWriteRes<[BWPort06,BWPort0156]> { } def: InstRW<[BWWriteResGroup55], (instrs XSETBV)>; -def BWWriteResGroup56 : SchedWriteRes<[BWPort06,BWPort0156]> { - let Latency = 5; - let NumMicroOps = 5; - let ResourceCycles = [2,3]; -} -def: InstRW<[BWWriteResGroup56], (instregex "CMPXCHG(8|16|32|64)rr")>; - def BWWriteResGroup57 : SchedWriteRes<[BWPort4,BWPort237,BWPort0156]> { let Latency = 5; let NumMicroOps = 6; @@ -1214,8 +1209,7 @@ def BWWriteResGroup100 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06,BWPo let ResourceCycles = [1,1,1,2,1]; } def : SchedAlias<WriteADCRMW, BWWriteResGroup100>; -def: InstRW<[BWWriteResGroup100], (instregex "CMPXCHG(8|16|32|64)rm", - "ROL(8|16|32|64)mCL", +def: InstRW<[BWWriteResGroup100], (instregex "ROL(8|16|32|64)mCL", "SAR(8|16|32|64)mCL", "SHL(8|16|32|64)mCL", "SHR(8|16|32|64)mCL")>; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 41a26e726b5..916bb309d32 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -126,6 +126,8 @@ defm : HWWriteResPair<WriteIMul64, [HWPort1], 3>; defm : X86WriteRes<WriteBSWAP32, [HWPort15], 1, [1], 1>; defm : X86WriteRes<WriteBSWAP64, [HWPort06, HWPort15], 2, [1,1], 2>; +defm : X86WriteRes<WriteCMPXCHG,[HWPort06, HWPort0156], 5, [2,3], 5>; +defm : X86WriteRes<WriteCMPXCHGRMW,[HWPort23,HWPort06,HWPort0156,HWPort237,HWPort4], 9, [1,2,1,1,1], 6>; defm : X86WriteRes<WriteXCHG, [HWPort0156], 2, [3], 3>; def : WriteRes<WriteIMulH, []> { let Latency = 3; } @@ -1349,8 +1351,7 @@ def HWWriteResGroup69 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06,HWPor let NumMicroOps = 6; let ResourceCycles = [1,1,1,2,1]; } -def: InstRW<[HWWriteResGroup69], (instregex "CMPXCHG(8|16|32|64)rm", - "ROL(8|16|32|64)mCL", +def: InstRW<[HWWriteResGroup69], (instregex "ROL(8|16|32|64)mCL", "SAR(8|16|32|64)mCL", "SHL(8|16|32|64)mCL", "SHR(8|16|32|64)mCL")>; @@ -1578,13 +1579,6 @@ def HWWriteResGroup100 : SchedWriteRes<[HWPort06,HWPort0156]> { } def: InstRW<[HWWriteResGroup100], (instrs XSETBV)>; -def HWWriteResGroup101 : SchedWriteRes<[HWPort06,HWPort0156]> { - let Latency = 5; - let NumMicroOps = 5; - let ResourceCycles = [2,3]; -} -def: InstRW<[HWWriteResGroup101], (instregex "CMPXCHG(8|16|32|64)rr")>; - def HWWriteResGroup102 : SchedWriteRes<[HWPort1,HWPort5]> { let Latency = 6; let NumMicroOps = 2; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 0030812e61e..4585110711f 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -114,7 +114,9 @@ defm : SBWriteResPair<WriteIMul64, [SBPort1], 3>; defm : X86WriteRes<WriteXCHG, [SBPort015], 2, [3], 3>; defm : X86WriteRes<WriteBSWAP32, [SBPort1], 1, [1], 1>; -defm : X86WriteRes<WriteBSWAP64, [SBPort1,SBPort05], 2, [1,1], 2>; +defm : X86WriteRes<WriteBSWAP64, [SBPort1, SBPort05], 2, [1,1], 2>; +defm : X86WriteRes<WriteCMPXCHG, [SBPort05, SBPort015], 5, [1,3], 4>; +defm : X86WriteRes<WriteCMPXCHGRMW,[SBPort015, SBPort5, SBPort23, SBPort4], 8, [1, 2, 2, 1], 6>; defm : SBWriteResPair<WriteDiv8, [SBPort0, SBDivider], 25, [1, 10]>; defm : SBWriteResPair<WriteDiv16, [SBPort0, SBDivider], 25, [1, 10]>; @@ -770,13 +772,6 @@ def SBWriteResGroup41 : SchedWriteRes<[SBPort5,SBPort015]> { } def: InstRW<[SBWriteResGroup41], (instrs FNINIT)>; -def SBWriteResGroup42 : SchedWriteRes<[SBPort05,SBPort015]> { - let Latency = 5; - let NumMicroOps = 4; - let ResourceCycles = [1,3]; -} -def: InstRW<[SBWriteResGroup42], (instregex "CMPXCHG(8|16|32|64)rr")>; - def SBWriteResGroup43 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> { let Latency = 3; let NumMicroOps = 4; @@ -953,12 +948,12 @@ def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { } def: InstRW<[SBWriteResGroup77], (instregex "(V?)(U?)COMI(SD|SS)rm")>; -def SBWriteResGroup81 : SchedWriteRes<[SBPort23,SBPort015]> { - let Latency = 8; - let NumMicroOps = 4; - let ResourceCycles = [1,3]; +def SBWriteResGroup81 : SchedWriteRes<[SBPort4, SBPort23, SBPort015]> { + let Latency = 6; + let NumMicroOps = 3; + let ResourceCycles = [1, 2, 1]; } -def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG(8|16|32|64)rm")>; +def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG(8|16)B")>; def SBWriteResGroup83 : SchedWriteRes<[SBPort23,SBPort015]> { let Latency = 8; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 378934b82b8..04ec9c4a220 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -112,6 +112,8 @@ defm : SKLWriteResPair<WriteIMul64, [SKLPort1], 3>; // Integer 64-bit multipl defm : X86WriteRes<WriteBSWAP32, [SKLPort15], 1, [1], 1>; defm : X86WriteRes<WriteBSWAP64, [SKLPort06, SKLPort15], 2, [1,1], 2>; +defm : X86WriteRes<WriteCMPXCHG,[SKLPort06, SKLPort0156], 5, [2,3], 5>; +defm : X86WriteRes<WriteCMPXCHGRMW,[SKLPort23,SKLPort06,SKLPort0156,SKLPort237,SKLPort4], 8, [1,2,1,1,1], 6>; defm : X86WriteRes<WriteXCHG, [SKLPort0156], 2, [3], 3>; defm : SKLWriteResPair<WriteDiv8, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>; @@ -965,13 +967,6 @@ def SKLWriteResGroup63 : SchedWriteRes<[SKLPort06,SKLPort0156]> { } def: InstRW<[SKLWriteResGroup63], (instrs XSETBV)>; -def SKLWriteResGroup64 : SchedWriteRes<[SKLPort06,SKLPort0156]> { - let Latency = 5; - let NumMicroOps = 5; - let ResourceCycles = [2,3]; -} -def: InstRW<[SKLWriteResGroup64], (instregex "CMPXCHG(8|16|32|64)rr")>; - def SKLWriteResGroup65 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort0156]> { let Latency = 5; let NumMicroOps = 6; @@ -1311,7 +1306,6 @@ def SKLWriteResGroup119 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06 let ResourceCycles = [1,1,1,2,1]; } def: SchedAlias<WriteADCRMW, SKLWriteResGroup119>; -def: InstRW<[SKLWriteResGroup119], (instregex "CMPXCHG(8|16|32|64)rm")>; def SKLWriteResGroup120 : SchedWriteRes<[SKLPort0,SKLPort23]> { let Latency = 9; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index d803b5d4349..3221709f9a9 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -112,6 +112,8 @@ defm : SKXWriteResPair<WriteIMul64, [SKXPort1], 3>; // Integer 64-bit multipl defm : X86WriteRes<WriteBSWAP32, [SKXPort15], 1, [1], 1>; defm : X86WriteRes<WriteBSWAP64, [SKXPort06, SKXPort15], 2, [1,1], 2>; +defm : X86WriteRes<WriteCMPXCHG,[SKXPort06, SKXPort0156], 5, [2,3], 5>; +defm : X86WriteRes<WriteCMPXCHGRMW,[SKXPort23,SKXPort06,SKXPort0156,SKXPort237,SKXPort4], 8, [1,2,1,1,1], 6>; defm : X86WriteRes<WriteXCHG, [SKXPort0156], 2, [3], 3>; defm : SKXWriteResPair<WriteDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>; @@ -1126,13 +1128,6 @@ def SKXWriteResGroup67 : SchedWriteRes<[SKXPort06,SKXPort0156]> { } def: InstRW<[SKXWriteResGroup67], (instrs XSETBV)>; -def SKXWriteResGroup68 : SchedWriteRes<[SKXPort06,SKXPort0156]> { - let Latency = 5; - let NumMicroOps = 5; - let ResourceCycles = [2,3]; -} -def: InstRW<[SKXWriteResGroup68], (instregex "CMPXCHG(8|16|32|64)rr")>; - def SKXWriteResGroup69 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort0156]> { let Latency = 5; let NumMicroOps = 6; @@ -1664,7 +1659,6 @@ def SKXWriteResGroup130 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06 let ResourceCycles = [1,1,1,2,1]; } def: SchedAlias<WriteADCRMW, SKXWriteResGroup130>; -def: InstRW<[SKXWriteResGroup130], (instregex "CMPXCHG(8|16|32|64)rm")>; def SKXWriteResGroup131 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { let Latency = 8; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 4f123a96416..20fa4bd03ef 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -120,7 +120,9 @@ def WriteLEA : SchedWrite; // LEA instructions can't fold loads. def WriteBSWAP32 : SchedWrite; // Byte Order (Endianness) 32-bit Swap. def WriteBSWAP64 : SchedWrite; // Byte Order (Endianness) 64-bit Swap. -def WriteXCHG : SchedWrite; // Compare+Exchange - TODO RMW support. +defm WriteCMPXCHG : X86SchedWritePair; // Compare and set, compare and swap. +def WriteCMPXCHGRMW : SchedWrite; // Compare and set, compare and swap. +def WriteXCHG : SchedWrite; // Compare+Exchange - TODO RMW support. // Integer division. defm WriteDiv8 : X86SchedWritePair; diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 88321b7454e..5c37a1589a7 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -84,6 +84,8 @@ defm : AtomWriteResPair<WriteIMul64, [AtomPort01], [AtomPort01], 12, 12, [12], [ defm : X86WriteRes<WriteXCHG, [AtomPort01], 2, [2], 1>; defm : X86WriteRes<WriteBSWAP32, [AtomPort0], 1, [1], 1>; defm : X86WriteRes<WriteBSWAP64, [AtomPort0], 1, [1], 1>; +defm : AtomWriteResPair<WriteCMPXCHG, [AtomPort01], [AtomPort01], 15, 15, [15]>; +defm : X86WriteRes<WriteCMPXCHGRMW, [AtomPort01, AtomPort0], 1, [1, 1], 1>; defm : AtomWriteResPair<WriteDiv8, [AtomPort01], [AtomPort01], 50, 68, [50], [68]>; defm : AtomWriteResPair<WriteDiv16, [AtomPort01], [AtomPort01], 50, 50, [50], [50]>; @@ -676,12 +678,6 @@ def AtomWrite01_14 : SchedWriteRes<[AtomPort01]> { } def : InstRW<[AtomWrite01_14], (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>; -def AtomWrite01_15 : SchedWriteRes<[AtomPort01]> { - let Latency = 15; - let ResourceCycles = [15]; -} -def : InstRW<[AtomWrite01_15], (instrs CMPXCHG16rr, CMPXCHG32rr, CMPXCHG64rr)>; - def AtomWrite01_17 : SchedWriteRes<[AtomPort01]> { let Latency = 17; let ResourceCycles = [17]; diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index b64f3b6d0ca..e80b09930e1 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -170,6 +170,8 @@ defm : X86WriteRes<WriteIMulH, [JALU1], 6, [4], 1>; defm : X86WriteRes<WriteBSWAP32, [JALU01], 1, [1], 1>; defm : X86WriteRes<WriteBSWAP64, [JALU01], 1, [1], 1>; +defm : X86WriteRes<WriteCMPXCHG,[JALU01], 1, [1], 1>; +defm : X86WriteRes<WriteCMPXCHGRMW,[JALU01, JSAGU, JLAGU], 4, [1, 1, 1], 2>; defm : X86WriteRes<WriteXCHG, [JALU01], 1, [1], 1>; defm : JWriteResIntPair<WriteDiv8, [JALU1, JDiv], 12, [1, 12], 1>; diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 8af32f5b1eb..cfede8c7359 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -100,7 +100,9 @@ defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1], 3>; defm : X86WriteRes<WriteBSWAP32, [SLM_IEC_RSV01], 1, [1], 1>; defm : X86WriteRes<WriteBSWAP64, [SLM_IEC_RSV01], 1, [1], 1>; -defm : X86WriteRes<WriteXCHG, [SLM_IEC_RSV01], 1, [1], 1>; +defm : X86WriteRes<WriteCMPXCHG, [SLM_IEC_RSV01], 1, [1], 1>; +defm : X86WriteRes<WriteCMPXCHGRMW, [SLM_IEC_RSV01, SLM_MEC_RSV], 4, [1, 2], 2>; +defm : X86WriteRes<WriteXCHG, [SLM_IEC_RSV01], 1, [1], 1>; defm : SLMWriteResPair<WriteShift, [SLM_IEC_RSV0], 1>; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index dc3fa688825..076e37359cf 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -182,6 +182,8 @@ defm : ZnWriteResPair<WriteIMul64, [ZnALU1, ZnMultiplier], 4, [1,1], 2>; defm : X86WriteRes<WriteBSWAP32, [ZnALU], 1, [4], 1>; defm : X86WriteRes<WriteBSWAP64, [ZnALU], 1, [4], 1>; +defm : X86WriteRes<WriteCMPXCHG, [ZnALU], 1, [1], 1>; +defm : X86WriteRes<WriteCMPXCHGRMW,[ZnALU,ZnAGU], 8, [1,1], 5>; defm : X86WriteRes<WriteXCHG, [ZnALU], 1, [2], 2>; defm : ZnWriteResPair<WriteShift, [ZnALU], 1>; @@ -743,13 +745,6 @@ def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)rrCL")>; def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)mrCL")>; //-- Misc instructions --// -// CMPXCHG. -def ZnWriteCMPXCHG : SchedWriteRes<[ZnAGU, ZnALU]> { - let Latency = 8; - let NumMicroOps = 5; -} -def : InstRW<[ZnWriteCMPXCHG], (instregex "CMPXCHG(8|16|32|64)rm")>; - // CMPXCHG8B. def ZnWriteCMPXCHG8B : SchedWriteRes<[ZnAGU, ZnALU]> { let NumMicroOps = 18; diff --git a/llvm/test/CodeGen/X86/schedule-x86_64.ll b/llvm/test/CodeGen/X86/schedule-x86_64.ll index 06c586cbf34..3e48097e52b 100644 --- a/llvm/test/CodeGen/X86/schedule-x86_64.ll +++ b/llvm/test/CodeGen/X86/schedule-x86_64.ll @@ -4307,7 +4307,7 @@ define void @test_cmpxchg_8(i8 %a0, i8 %a1, i8 *%a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: cmpxchgb %dil, %sil # sched: [5:1.33] -; GENERIC-NEXT: cmpxchgb %dil, (%rdx) # sched: [8:1.00] +; GENERIC-NEXT: cmpxchgb %dil, (%rdx) # sched: [8:2.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4331,7 +4331,7 @@ define void @test_cmpxchg_8(i8 %a0, i8 %a1, i8 *%a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: #APP ; SANDY-NEXT: cmpxchgb %dil, %sil # sched: [5:1.33] -; SANDY-NEXT: cmpxchgb %dil, (%rdx) # sched: [8:1.00] +; SANDY-NEXT: cmpxchgb %dil, (%rdx) # sched: [8:2.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -4390,7 +4390,7 @@ define void @test_cmpxchg_16(i16 %a0, i16 %a1, i16 *%a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: cmpxchgw %di, %si # sched: [5:1.33] -; GENERIC-NEXT: cmpxchgw %di, (%rdx) # sched: [8:1.00] +; GENERIC-NEXT: cmpxchgw %di, (%rdx) # sched: [8:2.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4414,7 +4414,7 @@ define void @test_cmpxchg_16(i16 %a0, i16 %a1, i16 *%a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: #APP ; SANDY-NEXT: cmpxchgw %di, %si # sched: [5:1.33] -; SANDY-NEXT: cmpxchgw %di, (%rdx) # sched: [8:1.00] +; SANDY-NEXT: cmpxchgw %di, (%rdx) # sched: [8:2.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -4473,7 +4473,7 @@ define void @test_cmpxchg_32(i32 %a0, i32 %a1, i32 *%a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: cmpxchgl %edi, %esi # sched: [5:1.33] -; GENERIC-NEXT: cmpxchgl %edi, (%rdx) # sched: [8:1.00] +; GENERIC-NEXT: cmpxchgl %edi, (%rdx) # sched: [8:2.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4497,7 +4497,7 @@ define void @test_cmpxchg_32(i32 %a0, i32 %a1, i32 *%a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: #APP ; SANDY-NEXT: cmpxchgl %edi, %esi # sched: [5:1.33] -; SANDY-NEXT: cmpxchgl %edi, (%rdx) # sched: [8:1.00] +; SANDY-NEXT: cmpxchgl %edi, (%rdx) # sched: [8:2.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -4556,7 +4556,7 @@ define void @test_cmpxchg_64(i64 %a0, i64 %a1, i64 *%a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP ; GENERIC-NEXT: cmpxchgq %rdi, %rsi # sched: [5:1.33] -; GENERIC-NEXT: cmpxchgq %rdi, (%rdx) # sched: [8:1.00] +; GENERIC-NEXT: cmpxchgq %rdi, (%rdx) # sched: [8:2.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4580,7 +4580,7 @@ define void @test_cmpxchg_64(i64 %a0, i64 %a1, i64 *%a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: #APP ; SANDY-NEXT: cmpxchgq %rdi, %rsi # sched: [5:1.33] -; SANDY-NEXT: cmpxchgq %rdi, (%rdx) # sched: [8:1.00] +; SANDY-NEXT: cmpxchgq %rdi, (%rdx) # sched: [8:2.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retq # sched: [1:1.00] ; diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-x86_64.s index 3e671410b5e..ed66bc6480d 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-x86_64.s @@ -1057,13 +1057,13 @@ xorq (%rax), %rdi # CHECK-NEXT: 5 8 1.00 U cmpsl %es:(%rdi), (%rsi) # CHECK-NEXT: 5 8 1.00 U cmpsq %es:(%rdi), (%rsi) # CHECK-NEXT: 4 5 1.33 cmpxchgb %cl, %bl -# CHECK-NEXT: 4 8 1.00 * * cmpxchgb %cl, (%rbx) +# CHECK-NEXT: 6 8 2.00 * * cmpxchgb %cl, (%rbx) # CHECK-NEXT: 4 5 1.33 cmpxchgw %cx, %bx -# CHECK-NEXT: 4 8 1.00 * * cmpxchgw %cx, (%rbx) +# CHECK-NEXT: 6 8 2.00 * * cmpxchgw %cx, (%rbx) # CHECK-NEXT: 4 5 1.33 cmpxchgl %ecx, %ebx -# CHECK-NEXT: 4 8 1.00 * * cmpxchgl %ecx, (%rbx) +# CHECK-NEXT: 6 8 2.00 * * cmpxchgl %ecx, (%rbx) # CHECK-NEXT: 4 5 1.33 cmpxchgq %rcx, %rbx -# CHECK-NEXT: 4 8 1.00 * * cmpxchgq %rcx, (%rbx) +# CHECK-NEXT: 6 8 2.00 * * cmpxchgq %rcx, (%rbx) # CHECK-NEXT: 1 100 0.33 U cpuid # CHECK-NEXT: 1 1 0.33 decb %dil # CHECK-NEXT: 3 7 1.00 * * decb (%rax) @@ -1620,7 +1620,7 @@ xorq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: 160.00 - 438.50 224.00 242.00 430.50 290.00 290.00 +# CHECK-NEXT: 160.00 - 435.83 221.33 246.00 435.83 292.00 292.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -1820,13 +1820,13 @@ xorq (%rax), %rdi # CHECK-NEXT: - - 1.00 1.00 - 1.00 1.00 1.00 cmpsl %es:(%rdi), (%rsi) # CHECK-NEXT: - - 1.00 1.00 - 1.00 1.00 1.00 cmpsq %es:(%rdi), (%rsi) # CHECK-NEXT: - - 1.50 1.00 - 1.50 - - cmpxchgb %cl, %bl -# CHECK-NEXT: - - 1.00 1.00 - 1.00 0.50 0.50 cmpxchgb %cl, (%rbx) +# CHECK-NEXT: - - 0.33 0.33 1.00 2.33 1.00 1.00 cmpxchgb %cl, (%rbx) # CHECK-NEXT: - - 1.50 1.00 - 1.50 - - cmpxchgw %cx, %bx -# CHECK-NEXT: - - 1.00 1.00 - 1.00 0.50 0.50 cmpxchgw %cx, (%rbx) +# CHECK-NEXT: - - 0.33 0.33 1.00 2.33 1.00 1.00 cmpxchgw %cx, (%rbx) # CHECK-NEXT: - - 1.50 1.00 - 1.50 - - cmpxchgl %ecx, %ebx -# CHECK-NEXT: - - 1.00 1.00 - 1.00 0.50 0.50 cmpxchgl %ecx, (%rbx) +# CHECK-NEXT: - - 0.33 0.33 1.00 2.33 1.00 1.00 cmpxchgl %ecx, (%rbx) # CHECK-NEXT: - - 1.50 1.00 - 1.50 - - cmpxchgq %rcx, %rbx -# CHECK-NEXT: - - 1.00 1.00 - 1.00 0.50 0.50 cmpxchgq %rcx, (%rbx) +# CHECK-NEXT: - - 0.33 0.33 1.00 2.33 1.00 1.00 cmpxchgq %rcx, (%rbx) # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - cpuid # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - decb %dil # CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 decb (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-x86_64.s index d3184bd4e54..6fb5e831557 100644 --- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-x86_64.s @@ -852,6 +852,8 @@ xorq %rsi, %rdi xorq %rsi, (%rax) xorq (%rax), %rdi +# CCHECK-NEXT: 160.00 - 439.83 221.33 250.00 431.83 294.00 294.00 + # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps # CHECK-NEXT: [2]: Latency @@ -1057,13 +1059,13 @@ xorq (%rax), %rdi # CHECK-NEXT: 5 8 1.00 U cmpsl %es:(%rdi), (%rsi) # CHECK-NEXT: 5 8 1.00 U cmpsq %es:(%rdi), (%rsi) # CHECK-NEXT: 4 5 1.33 cmpxchgb %cl, %bl -# CHECK-NEXT: 4 8 1.00 * * cmpxchgb %cl, (%rbx) +# CHECK-NEXT: 6 8 2.00 * * cmpxchgb %cl, (%rbx) # CHECK-NEXT: 4 5 1.33 cmpxchgw %cx, %bx -# CHECK-NEXT: 4 8 1.00 * * cmpxchgw %cx, (%rbx) +# CHECK-NEXT: 6 8 2.00 * * cmpxchgw %cx, (%rbx) # CHECK-NEXT: 4 5 1.33 cmpxchgl %ecx, %ebx -# CHECK-NEXT: 4 8 1.00 * * cmpxchgl %ecx, (%rbx) +# CHECK-NEXT: 6 8 2.00 * * cmpxchgl %ecx, (%rbx) # CHECK-NEXT: 4 5 1.33 cmpxchgq %rcx, %rbx -# CHECK-NEXT: 4 8 1.00 * * cmpxchgq %rcx, (%rbx) +# CHECK-NEXT: 6 8 2.00 * * cmpxchgq %rcx, (%rbx) # CHECK-NEXT: 1 100 0.33 U cpuid # CHECK-NEXT: 1 1 0.33 decb %dil # CHECK-NEXT: 3 7 1.00 * * decb (%rax) @@ -1620,7 +1622,7 @@ xorq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: 160.00 - 438.50 224.00 242.00 430.50 290.00 290.00 +# CHECK-NEXT: 160.00 - 435.83 221.33 246.00 435.83 292.00 292.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -1820,13 +1822,13 @@ xorq (%rax), %rdi # CHECK-NEXT: - - 1.00 1.00 - 1.00 1.00 1.00 cmpsl %es:(%rdi), (%rsi) # CHECK-NEXT: - - 1.00 1.00 - 1.00 1.00 1.00 cmpsq %es:(%rdi), (%rsi) # CHECK-NEXT: - - 1.50 1.00 - 1.50 - - cmpxchgb %cl, %bl -# CHECK-NEXT: - - 1.00 1.00 - 1.00 0.50 0.50 cmpxchgb %cl, (%rbx) +# CHECK-NEXT: - - 0.33 0.33 1.00 2.33 1.00 1.00 cmpxchgb %cl, (%rbx) # CHECK-NEXT: - - 1.50 1.00 - 1.50 - - cmpxchgw %cx, %bx -# CHECK-NEXT: - - 1.00 1.00 - 1.00 0.50 0.50 cmpxchgw %cx, (%rbx) +# CHECK-NEXT: - - 0.33 0.33 1.00 2.33 1.00 1.00 cmpxchgw %cx, (%rbx) # CHECK-NEXT: - - 1.50 1.00 - 1.50 - - cmpxchgl %ecx, %ebx -# CHECK-NEXT: - - 1.00 1.00 - 1.00 0.50 0.50 cmpxchgl %ecx, (%rbx) +# CHECK-NEXT: - - 0.33 0.33 1.00 2.33 1.00 1.00 cmpxchgl %ecx, (%rbx) # CHECK-NEXT: - - 1.50 1.00 - 1.50 - - cmpxchgq %rcx, %rbx -# CHECK-NEXT: - - 1.00 1.00 - 1.00 0.50 0.50 cmpxchgq %rcx, (%rbx) +# CHECK-NEXT: - - 0.33 0.33 1.00 2.33 1.00 1.00 cmpxchgq %rcx, (%rbx) # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - cpuid # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - decb %dil # CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 decb (%rax) |