diff options
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleBtVer2.td | 92 |
1 files changed, 91 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 6849b258f8e..b5dce16b5e3 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -195,7 +195,7 @@ defm : X86WriteRes<WriteBSWAP32, [JALU01], 1, [1], 1>; defm : X86WriteRes<WriteBSWAP64, [JALU01], 1, [1], 1>; defm : X86WriteRes<WriteCMPXCHG, [JALU01], 3, [3], 5>; defm : X86WriteRes<WriteCMPXCHGRMW, [JALU01, JSAGU, JLAGU], 11, [3, 1, 1], 6>; -defm : X86WriteRes<WriteXCHG, [JALU01], 1, [1], 1>; +defm : X86WriteRes<WriteXCHG, [JALU01], 1, [2], 2>; defm : JWriteResIntPair<WriteIMul8, [JALU1, JMul], 3, [1, 1], 2>; defm : JWriteResIntPair<WriteIMul16, [JALU1, JMul], 3, [1, 1], 2>; @@ -395,6 +395,96 @@ def : InstRW<[JWriteLOCK_ALURMWVariant], (instrs INC8m, INC16m, INC32m, INC64m, NOT8m, NOT16m, NOT32m, NOT64m, NEG8m, NEG16m, NEG32m, NEG64m)>; +def JWriteXCHG8rr_XADDrr : SchedWriteRes<[JALU01]> { + let Latency = 2; + let ResourceCycles = [3]; + let NumMicroOps = 3; +} +def : InstRW<[JWriteXCHG8rr_XADDrr], (instrs XCHG8rr, XADD8rr, XADD16rr, + XADD32rr, XADD64rr)>; + +// This write defines the latency of the in/out register operand of a non-atomic +// XADDrm. This is the first of a pair of writes that model non-atomic +// XADDrm instructions (the second write definition is JWriteXADDrm_LdSt_Part). +// +// We need two writes because the instruction latency differs from the output +// register operand latency. In particular, the first write describes the first +// (and only) output register operand of the instruction. However, the +// instruction latency is set to the MAX of all the write latencies. That's why +// a second write is needed in this case (see example below). +// +// Example: +// XADD %ecx, (%rsp) ## Instruction latency: 11cy +// ## ECX write Latency: 3cy +// +// Register ECX becomes available in 3 cycles. That is because the value of ECX +// is exchanged with the value read from the stack pointer, and the load-to-use +// latency is assumed to be 3cy. +def JWriteXADDrm_XCHG_Part : SchedWriteRes<[JALU01]> { + let Latency = 3; // load-to-use latency + let ResourceCycles = [3]; + let NumMicroOps = 3; +} + +// This write defines the latency of the in/out register operand of an atomic +// XADDrm. This is the first of a sequence of two writes used to model atomic +// XADD instructions. The second write of the sequence is JWriteXCHGrm_LdSt_Part. +// +// +// Example: +// LOCK XADD %ecx, (%rsp) ## Instruction Latency: 16cy +// ## ECX write Latency: 11cy +// +// The value of ECX becomes available only after 11cy from the start of +// execution. This write is used to specifically set that operand latency. +def JWriteLOCK_XADDrm_XCHG_Part : SchedWriteRes<[JALU01]> { + let Latency = 11; + let ResourceCycles = [3]; + let NumMicroOps = 3; +} + +// This write defines the latency of the in/out register operand of an atomic +// XCHGrm. This write is the first of a sequence of two writes that describe +// atomic XCHG operations. We need two writes because the instruction latency +// differs from the output register write latency. We want to make sure that +// the output register operand becomes visible after 11cy. However, we want to +// set the instruction latency to 16cy. +def JWriteXCHGrm_XCHG_Part : SchedWriteRes<[JALU01]> { + let Latency = 11; + let ResourceCycles = [2]; + let NumMicroOps = 2; +} + +def JWriteXADDrm_LdSt_Part : SchedWriteRes<[JLAGU, JSAGU]> { + let Latency = 11; + let ResourceCycles = [1, 1]; + let NumMicroOps = 1; +} + +def JWriteXCHGrm_LdSt_Part : SchedWriteRes<[JLAGU, JSAGU]> { + let Latency = 16; + let ResourceCycles = [16, 16]; + let NumMicroOps = 1; +} + +def JWriteXADDrm_Part1 : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<CheckLockPrefix>, [JWriteLOCK_XADDrm_XCHG_Part]>, + SchedVar<NoSchedPred, [JWriteXADDrm_XCHG_Part]> +]>; + +def JWriteXADDrm_Part2 : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<CheckLockPrefix>, [JWriteXCHGrm_LdSt_Part]>, + SchedVar<NoSchedPred, [JWriteXADDrm_LdSt_Part]> +]>; + +def : InstRW<[JWriteXADDrm_Part1, JWriteXADDrm_Part2, ReadAfterLd], + (instrs XADD8rm, XADD16rm, XADD32rm, XADD64rm, + LXADD8, LXADD16, LXADD32, LXADD64)>; + +def : InstRW<[JWriteXCHGrm_XCHG_Part, JWriteXCHGrm_LdSt_Part, ReadAfterLd], + (instrs XCHG8rm, XCHG16rm, XCHG32rm, XCHG64rm)>; + + //////////////////////////////////////////////////////////////////////////////// // Floating point. This covers both scalar and vector operations. //////////////////////////////////////////////////////////////////////////////// |

