Use an offset from TOS for idempotent rmw locked op lowering

This was the portion split off D58632 so that it could follow the redzone API cleanup. Note that I changed the offset preferred from -8 to -64. The difference should be very minor, but I thought it might help address one concern which had been previously raised. Differential Revision: https://reviews.llvm.org/D61862 llvm-svn: 360719
author: Philip Reames <listmail@philipreames.com> 2019-05-14 22:32:42 +0000
committer: Philip Reames <listmail@philipreames.com> 2019-05-14 22:32:42 +0000
commit: 445f942fc49821b2c0964f595c9d9fc8f9177308 (patch)
tree: 11c08a47af761b2ebb900405a6270ca6139abb37 /llvm/lib
parent: f3011b9b106b2d0961d5a825823e8fcc711e0abe (diff)
download: bcm5719-llvm-445f942fc49821b2c0964f595c9d9fc8f9177308.tar.gz
bcm5719-llvm-445f942fc49821b2c0964f595c9d9fc8f9177308.zip
1 files changed, 16 insertions, 6 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 43911a1b016..7e05fccd338 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -26292,21 +26292,31 @@ static SDValue emitLockedStackOp(SelectionDAG &DAG,
   // here since it doesn't require an extra register.
   // 3) OR appears to be very slightly faster than ADD. (Though, the difference
   // is small enough it might just be measurement noise.)
-  // 4) For the moment, we are using top of stack.  This creates false sharing
-  // with actual stack access/call sequences, and it would be better to use a
-  // location within the redzone.  For the moment, this is still better than an
-  // mfence though.  TODO: Revise the offset used when we can assume a redzone.
+  // 4) When choosing offsets, there are several contributing factors:
+  //   a) If there's no redzone, we default to TOS.  (We could allocate a cache
+  //      line aligned stack object to improve this case.) 
+  //   b) To minimize our chances of introducing a false dependence, we prefer
+  //      to offset the stack usage from TOS slightly.  
+  //   c) To minimize concerns about cross thread stack usage - in particular,
+  //      the idiomatic MyThreadPool.run([&StackVars]() {...}) pattern which
+  //      captures state in the TOS frame and accesses it from many threads -
+  //      we want to use an offset such that the offset is in a distinct cache
+  //      line from the TOS frame.
   // 
   // For a general discussion of the tradeoffs and benchmark results, see:
   // https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
 
+  auto &MF = DAG.getMachineFunction();
+  auto &TFL = *Subtarget.getFrameLowering();
+  const unsigned SPOffset = TFL.has128ByteRedZone(MF) ? -64 : 0;
+
   if (Subtarget.is64Bit()) {
     SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32);
     SDValue Ops[] = {
       DAG.getRegister(X86::RSP, MVT::i64),                  // Base
       DAG.getTargetConstant(1, DL, MVT::i8),                // Scale
       DAG.getRegister(0, MVT::i64),                         // Index
-      DAG.getTargetConstant(0, DL, MVT::i32),               // Disp
+      DAG.getTargetConstant(SPOffset, DL, MVT::i32),        // Disp
       DAG.getRegister(0, MVT::i16),                         // Segment.
       Zero,
       Chain};
@@ -26320,7 +26330,7 @@ static SDValue emitLockedStackOp(SelectionDAG &DAG,
     DAG.getRegister(X86::ESP, MVT::i32),            // Base
     DAG.getTargetConstant(1, DL, MVT::i8),          // Scale
     DAG.getRegister(0, MVT::i32),                   // Index
-    DAG.getTargetConstant(0, DL, MVT::i32),         // Disp
+    DAG.getTargetConstant(SPOffset, DL, MVT::i32),  // Disp
     DAG.getRegister(0, MVT::i16),                   // Segment.
     Zero,
     Chain
author	Philip Reames <listmail@philipreames.com>	2019-05-14 22:32:42 +0000
committer	Philip Reames <listmail@philipreames.com>	2019-05-14 22:32:42 +0000
commit	445f942fc49821b2c0964f595c9d9fc8f9177308 (patch)
tree	11c08a47af761b2ebb900405a6270ca6139abb37 /llvm/lib
parent	f3011b9b106b2d0961d5a825823e8fcc711e0abe (diff)
download	bcm5719-llvm-445f942fc49821b2c0964f595c9d9fc8f9177308.tar.gz bcm5719-llvm-445f942fc49821b2c0964f595c9d9fc8f9177308.zip