2 files changed, 29 insertions, 0 deletions
diff --git a/compiler-rt/lib/xray/xray_fdr_log_writer.h b/compiler-rt/lib/xray/xray_fdr_log_writer.h
index 67675ec3d12..c18ff7bbf0b 100644
--- a/compiler-rt/lib/xray/xray_fdr_log_writer.h
+++ b/compiler-rt/lib/xray/xray_fdr_log_writer.h
@@ -63,6 +63,10 @@ class FDRLogWriter {
   template <class T> void writeRecord(const T &R) {
     internal_memcpy(NextRecord, reinterpret_cast<const char *>(&R), sizeof(T));
     NextRecord += sizeof(T);
+    // We need this atomic fence here to ensure that other threads attempting to
+    // read the bytes in the buffer will see the writes committed before the
+    // extents are updated.
+    atomic_thread_fence(memory_order_release);
     atomic_fetch_add(&Buffer.Extents, sizeof(T), memory_order_acq_rel);
   }
 
@@ -89,6 +93,10 @@ public:
     constexpr auto Size = sizeof(MetadataRecord) * N;
     internal_memcpy(NextRecord, reinterpret_cast<const char *>(Recs), Size);
     NextRecord += Size;
+    // We need this atomic fence here to ensure that other threads attempting to
+    // read the bytes in the buffer will see the writes committed before the
+    // extents are updated.
+    atomic_thread_fence(memory_order_release);
     atomic_fetch_add(&Buffer.Extents, Size, memory_order_acq_rel);
     return Size;
   }
@@ -129,6 +137,10 @@ public:
     NextRecord = reinterpret_cast<char *>(internal_memcpy(
                      NextRecord, reinterpret_cast<char *>(&A), sizeof(A))) +
                  sizeof(A);
+    // We need this atomic fence here to ensure that other threads attempting to
+    // read the bytes in the buffer will see the writes committed before the
+    // extents are updated.
+    atomic_thread_fence(memory_order_release);
     atomic_fetch_add(&Buffer.Extents, sizeof(R) + sizeof(A),
                      memory_order_acq_rel);
     return true;
@@ -149,6 +161,11 @@ public:
     NextRecord = reinterpret_cast<char *>(
                      internal_memcpy(NextRecord, Event, EventSize)) +
                  EventSize;
+
+    // We need this atomic fence here to ensure that other threads attempting to
+    // read the bytes in the buffer will see the writes committed before the
+    // extents are updated.
+    atomic_thread_fence(memory_order_release);
     atomic_fetch_add(&Buffer.Extents, sizeof(R) + EventSize,
                      memory_order_acq_rel);
     return true;
@@ -167,6 +184,11 @@ public:
     NextRecord = reinterpret_cast<char *>(
                      internal_memcpy(NextRecord, Event, EventSize)) +
                  EventSize;
+
+    // We need this atomic fence here to ensure that other threads attempting to
+    // read the bytes in the buffer will see the writes committed before the
+    // extents are updated.
+    atomic_thread_fence(memory_order_release);
     atomic_fetch_add(&Buffer.Extents, EventSize, memory_order_acq_rel);
     return true;
   }
diff --git a/compiler-rt/lib/xray/xray_fdr_logging.cc b/compiler-rt/lib/xray/xray_fdr_logging.cc
index 83f4f97a2b4..f1eca5ce926 100644
--- a/compiler-rt/lib/xray/xray_fdr_logging.cc
+++ b/compiler-rt/lib/xray/xray_fdr_logging.cc
@@ -243,6 +243,13 @@ XRayBuffer fdrIterator(const XRayBuffer B) {
   // out to disk. The difference here would be that we still write "empty"
   // buffers, or at least go through the iterators faithfully to let the
   // handlers see the empty buffers in the queue.
+  //
+  // We need this atomic fence here to ensure that writes happening to the
+  // buffer have been committed before we load the extents atomically. Because
+  // the buffer is not explicitly synchronised across threads, we rely on the
+  // fence ordering to ensure that writes we expect to have been completed
+  // before the fence are fully committed before we read the extents.
+  atomic_thread_fence(memory_order_acquire);
   auto BufferSize = atomic_load(&It->Extents, memory_order_acquire);
   SerializedBufferSize = BufferSize + sizeof(MetadataRecord);
   CurrentBuffer = allocateBuffer(SerializedBufferSize);