1 files changed, 91 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index 6849b258f8e..b5dce16b5e3 100644
--- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -195,7 +195,7 @@ defm : X86WriteRes<WriteBSWAP32,     [JALU01], 1, [1], 1>;
 defm : X86WriteRes<WriteBSWAP64,     [JALU01], 1, [1], 1>;
 defm : X86WriteRes<WriteCMPXCHG,     [JALU01], 3, [3], 5>;
 defm : X86WriteRes<WriteCMPXCHGRMW,  [JALU01, JSAGU, JLAGU], 11, [3, 1, 1], 6>;
-defm : X86WriteRes<WriteXCHG,        [JALU01], 1, [1], 1>;
+defm : X86WriteRes<WriteXCHG,        [JALU01], 1, [2], 2>;
 
 defm : JWriteResIntPair<WriteIMul8,     [JALU1, JMul], 3, [1, 1], 2>;
 defm : JWriteResIntPair<WriteIMul16,    [JALU1, JMul], 3, [1, 1], 2>;
@@ -395,6 +395,96 @@ def : InstRW<[JWriteLOCK_ALURMWVariant], (instrs INC8m, INC16m, INC32m, INC64m,
                                                  NOT8m, NOT16m, NOT32m, NOT64m,
                                                  NEG8m, NEG16m, NEG32m, NEG64m)>;
 
+def JWriteXCHG8rr_XADDrr : SchedWriteRes<[JALU01]> {
+  let Latency = 2;
+  let ResourceCycles = [3];
+  let NumMicroOps = 3;
+}
+def : InstRW<[JWriteXCHG8rr_XADDrr], (instrs XCHG8rr, XADD8rr, XADD16rr,
+                                                      XADD32rr, XADD64rr)>;
+
+// This write defines the latency of the in/out register operand of a non-atomic
+// XADDrm. This is the first of a pair of writes that model non-atomic
+// XADDrm instructions (the second write definition is JWriteXADDrm_LdSt_Part).
+//
+// We need two writes because the instruction latency differs from the output
+// register operand latency. In particular, the first write describes the first
+// (and only) output register operand of the instruction.  However, the
+// instruction latency is set to the MAX of all the write latencies. That's why
+// a second write is needed in this case (see example below).
+//
+// Example:
+//     XADD %ecx, (%rsp)      ## Instruction latency: 11cy
+//                            ## ECX write Latency: 3cy
+//
+// Register ECX becomes available in 3 cycles. That is because the value of ECX
+// is exchanged with the value read from the stack pointer, and the load-to-use
+// latency is assumed to be 3cy.
+def JWriteXADDrm_XCHG_Part : SchedWriteRes<[JALU01]> {
+  let Latency = 3;  // load-to-use latency
+  let ResourceCycles = [3];
+  let NumMicroOps = 3;
+}
+
+// This write defines the latency of the in/out register operand of an atomic
+// XADDrm. This is the first of a sequence of two writes used to model atomic
+// XADD instructions. The second write of the sequence is JWriteXCHGrm_LdSt_Part.
+//
+//
+// Example:
+//    LOCK XADD %ecx, (%rsp)     ## Instruction Latency: 16cy
+//                               ## ECX write Latency: 11cy
+//
+// The value of ECX becomes available only after 11cy from the start of
+// execution. This write is used to specifically set that operand latency. 
+def JWriteLOCK_XADDrm_XCHG_Part : SchedWriteRes<[JALU01]> {
+  let Latency = 11;
+  let ResourceCycles = [3];
+  let NumMicroOps = 3;
+}
+
+// This write defines the latency of the in/out register operand of an atomic
+// XCHGrm. This write is the first of a sequence of two writes that describe
+// atomic XCHG operations. We need two writes because the instruction latency
+// differs from the output register write latency.  We want to make sure that
+// the output register operand becomes visible after 11cy. However, we want to
+// set the instruction latency to 16cy.
+def JWriteXCHGrm_XCHG_Part : SchedWriteRes<[JALU01]> {
+  let Latency = 11;
+  let ResourceCycles = [2];
+  let NumMicroOps = 2;
+}
+
+def JWriteXADDrm_LdSt_Part : SchedWriteRes<[JLAGU, JSAGU]> {
+  let Latency = 11;
+  let ResourceCycles = [1, 1];
+  let NumMicroOps = 1;
+}
+
+def JWriteXCHGrm_LdSt_Part : SchedWriteRes<[JLAGU, JSAGU]> {
+  let Latency = 16;
+  let ResourceCycles = [16, 16];
+  let NumMicroOps = 1;
+}
+
+def JWriteXADDrm_Part1 : SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<CheckLockPrefix>, [JWriteLOCK_XADDrm_XCHG_Part]>,
+  SchedVar<NoSchedPred,                       [JWriteXADDrm_XCHG_Part]>
+]>;
+
+def JWriteXADDrm_Part2 : SchedWriteVariant<[
+  SchedVar<MCSchedPredicate<CheckLockPrefix>, [JWriteXCHGrm_LdSt_Part]>,
+  SchedVar<NoSchedPred,                       [JWriteXADDrm_LdSt_Part]>
+]>;
+
+def : InstRW<[JWriteXADDrm_Part1, JWriteXADDrm_Part2, ReadAfterLd],
+                 (instrs XADD8rm, XADD16rm, XADD32rm, XADD64rm,
+                         LXADD8, LXADD16, LXADD32, LXADD64)>;
+
+def : InstRW<[JWriteXCHGrm_XCHG_Part, JWriteXCHGrm_LdSt_Part, ReadAfterLd],
+                 (instrs XCHG8rm, XCHG16rm, XCHG32rm, XCHG64rm)>;
+
+
 ////////////////////////////////////////////////////////////////////////////////
 // Floating point. This covers both scalar and vector operations.
 ////////////////////////////////////////////////////////////////////////////////