summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ScheduleBtVer2.td92
1 files changed, 91 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index 6849b258f8e..b5dce16b5e3 100644
--- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -195,7 +195,7 @@ defm : X86WriteRes<WriteBSWAP32, [JALU01], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [JALU01], 1, [1], 1>;
defm : X86WriteRes<WriteCMPXCHG, [JALU01], 3, [3], 5>;
defm : X86WriteRes<WriteCMPXCHGRMW, [JALU01, JSAGU, JLAGU], 11, [3, 1, 1], 6>;
-defm : X86WriteRes<WriteXCHG, [JALU01], 1, [1], 1>;
+defm : X86WriteRes<WriteXCHG, [JALU01], 1, [2], 2>;
defm : JWriteResIntPair<WriteIMul8, [JALU1, JMul], 3, [1, 1], 2>;
defm : JWriteResIntPair<WriteIMul16, [JALU1, JMul], 3, [1, 1], 2>;
@@ -395,6 +395,96 @@ def : InstRW<[JWriteLOCK_ALURMWVariant], (instrs INC8m, INC16m, INC32m, INC64m,
NOT8m, NOT16m, NOT32m, NOT64m,
NEG8m, NEG16m, NEG32m, NEG64m)>;
+def JWriteXCHG8rr_XADDrr : SchedWriteRes<[JALU01]> {
+ let Latency = 2;
+ let ResourceCycles = [3];
+ let NumMicroOps = 3;
+}
+def : InstRW<[JWriteXCHG8rr_XADDrr], (instrs XCHG8rr, XADD8rr, XADD16rr,
+ XADD32rr, XADD64rr)>;
+
+// This write defines the latency of the in/out register operand of a non-atomic
+// XADDrm. This is the first of a pair of writes that model non-atomic
+// XADDrm instructions (the second write definition is JWriteXADDrm_LdSt_Part).
+//
+// We need two writes because the instruction latency differs from the output
+// register operand latency. In particular, the first write describes the first
+// (and only) output register operand of the instruction. However, the
+// instruction latency is set to the MAX of all the write latencies. That's why
+// a second write is needed in this case (see example below).
+//
+// Example:
+// XADD %ecx, (%rsp) ## Instruction latency: 11cy
+// ## ECX write Latency: 3cy
+//
+// Register ECX becomes available in 3 cycles. That is because the value of ECX
+// is exchanged with the value read from the stack pointer, and the load-to-use
+// latency is assumed to be 3cy.
+def JWriteXADDrm_XCHG_Part : SchedWriteRes<[JALU01]> {
+ let Latency = 3; // load-to-use latency
+ let ResourceCycles = [3];
+ let NumMicroOps = 3;
+}
+
+// This write defines the latency of the in/out register operand of an atomic
+// XADDrm. This is the first of a sequence of two writes used to model atomic
+// XADD instructions. The second write of the sequence is JWriteXCHGrm_LdSt_Part.
+//
+//
+// Example:
+// LOCK XADD %ecx, (%rsp) ## Instruction Latency: 16cy
+// ## ECX write Latency: 11cy
+//
+// The value of ECX becomes available only after 11cy from the start of
+// execution. This write is used to specifically set that operand latency.
+def JWriteLOCK_XADDrm_XCHG_Part : SchedWriteRes<[JALU01]> {
+ let Latency = 11;
+ let ResourceCycles = [3];
+ let NumMicroOps = 3;
+}
+
+// This write defines the latency of the in/out register operand of an atomic
+// XCHGrm. This write is the first of a sequence of two writes that describe
+// atomic XCHG operations. We need two writes because the instruction latency
+// differs from the output register write latency. We want to make sure that
+// the output register operand becomes visible after 11cy. However, we want to
+// set the instruction latency to 16cy.
+def JWriteXCHGrm_XCHG_Part : SchedWriteRes<[JALU01]> {
+ let Latency = 11;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2;
+}
+
+def JWriteXADDrm_LdSt_Part : SchedWriteRes<[JLAGU, JSAGU]> {
+ let Latency = 11;
+ let ResourceCycles = [1, 1];
+ let NumMicroOps = 1;
+}
+
+def JWriteXCHGrm_LdSt_Part : SchedWriteRes<[JLAGU, JSAGU]> {
+ let Latency = 16;
+ let ResourceCycles = [16, 16];
+ let NumMicroOps = 1;
+}
+
+def JWriteXADDrm_Part1 : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<CheckLockPrefix>, [JWriteLOCK_XADDrm_XCHG_Part]>,
+ SchedVar<NoSchedPred, [JWriteXADDrm_XCHG_Part]>
+]>;
+
+def JWriteXADDrm_Part2 : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<CheckLockPrefix>, [JWriteXCHGrm_LdSt_Part]>,
+ SchedVar<NoSchedPred, [JWriteXADDrm_LdSt_Part]>
+]>;
+
+def : InstRW<[JWriteXADDrm_Part1, JWriteXADDrm_Part2, ReadAfterLd],
+ (instrs XADD8rm, XADD16rm, XADD32rm, XADD64rm,
+ LXADD8, LXADD16, LXADD32, LXADD64)>;
+
+def : InstRW<[JWriteXCHGrm_XCHG_Part, JWriteXCHGrm_LdSt_Part, ReadAfterLd],
+ (instrs XCHG8rm, XCHG16rm, XCHG32rm, XCHG64rm)>;
+
+
////////////////////////////////////////////////////////////////////////////////
// Floating point. This covers both scalar and vector operations.
////////////////////////////////////////////////////////////////////////////////
OpenPOWER on IntegriCloud