4 files changed, 91 insertions, 1 deletions
diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
index 72b0048b031..6bf95b12d80 100644
--- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -567,6 +567,12 @@ void ValueEnumerator::dropFunctionFromMetadata(
 }
 
 void ValueEnumerator::EnumerateMetadata(unsigned F, const Metadata *MD) {
+  // It's vital for reader efficiency that uniqued subgraphs are done in
+  // post-order; it's expensive when their operands have forward references.
+  // If a distinct node is referenced from a uniqued node, it'll be delayed
+  // until the uniqued subgraph has been completely traversed.
+  SmallVector<const MDNode *, 32> DelayedDistinctNodes;
+
   // Start by enumerating MD, and then work through its transitive operands in
   // post-order.  This requires a depth-first search.
   SmallVector<std::pair<const MDNode *, MDNode::op_iterator>, 32> Worklist;
@@ -584,7 +590,12 @@ void ValueEnumerator::EnumerateMetadata(unsigned F, const Metadata *MD) {
     if (I != N->op_end()) {
       auto *Op = cast<MDNode>(*I);
       Worklist.back().second = ++I;
-      Worklist.push_back(std::make_pair(Op, Op->op_begin()));
+
+      // Delay traversing Op if it's a distinct node and N is uniqued.
+      if (Op->isDistinct() && !N->isDistinct())
+        DelayedDistinctNodes.push_back(Op);
+      else
+        Worklist.push_back(std::make_pair(Op, Op->op_begin()));
       continue;
     }
 
@@ -592,6 +603,14 @@ void ValueEnumerator::EnumerateMetadata(unsigned F, const Metadata *MD) {
     Worklist.pop_back();
     MDs.push_back(N);
     MetadataMap[N].ID = MDs.size();
+
+    // Flush out any delayed distinct nodes; these are all the distinct nodes
+    // that are leaves in last uniqued subgraph.
+    if (Worklist.empty() || Worklist.back().first->isDistinct()) {
+      for (const MDNode *N : DelayedDistinctNodes)
+        Worklist.push_back(std::make_pair(N, N->op_begin()));
+      DelayedDistinctNodes.clear();
+    }
   }
 }
 
diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.h b/llvm/lib/Bitcode/Writer/ValueEnumerator.h
index 14407bed03f..bff2de70b3e 100644
--- a/llvm/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.h
@@ -256,8 +256,26 @@ private:
   const MDNode *enumerateMetadataImpl(unsigned F, const Metadata *MD);
 
   unsigned getMetadataFunctionID(const Function *F) const;
+
+  /// Enumerate reachable metadata in (almost) post-order.
+  ///
+  /// Enumerate all the metadata reachable from MD.  We want to minimize the
+  /// cost of reading bitcode records, and so the primary consideration is that
+  /// operands of uniqued nodes are resolved before the nodes are read.  This
+  /// avoids re-uniquing them on the context and factors away RAUW support.
+  ///
+  /// This algorithm guarantees that subgraphs of uniqued nodes are in
+  /// post-order.  Distinct subgraphs reachable only from a single uniqued node
+  /// will be in post-order.
+  ///
+  /// \note The relative order of a distinct and uniqued node is irrelevant.
+  /// \a organizeMetadata() will later partition distinct nodes ahead of
+  /// uniqued ones.
+  ///{
   void EnumerateMetadata(const Function *F, const Metadata *MD);
   void EnumerateMetadata(unsigned F, const Metadata *MD);
+  ///}
+
   void EnumerateFunctionLocalMetadata(const Function &F,
                                       const LocalAsMetadata *Local);
   void EnumerateFunctionLocalMetadata(unsigned F, const LocalAsMetadata *Local);
diff --git a/llvm/test/Bitcode/mdnodes-distinct-in-post-order.ll b/llvm/test/Bitcode/mdnodes-distinct-in-post-order.ll
new file mode 100644
index 00000000000..6e6ba604235
--- /dev/null
+++ b/llvm/test/Bitcode/mdnodes-distinct-in-post-order.ll
@@ -0,0 +1,24 @@
+; RUN: llvm-as <%s | llvm-bcanalyzer -dump | FileCheck %s
+; Check that distinct nodes are emitted in post-order to avoid unnecessary
+; forward references.
+
+; Nodes in this testcase are numbered to match how they are referenced in
+; bitcode.  !3 is referenced as opN=3.
+
+; The leafs should come first (in either order).
+; CHECK:       <DISTINCT_NODE/>
+; CHECK-NEXT:  <DISTINCT_NODE/>
+!1 = distinct !{}
+!2 = distinct !{}
+
+; CHECK-NEXT:  <DISTINCT_NODE op0=1 op1=2/>
+!3 = distinct !{!1, !2}
+
+; CHECK-NEXT:  <DISTINCT_NODE op0=1 op1=3 op2=2/>
+!4 = distinct !{!1, !3, !2}
+
+; Note: named metadata nodes are not cannot reference null so their operands
+; are numbered off-by-one.
+; CHECK-NEXT:  <NAME
+; CHECK-NEXT:  <NAMED_NODE op0=3/>
+!named = !{!4}
diff --git a/llvm/test/Bitcode/mdnodes-distinct-nodes-break-cycles.ll b/llvm/test/Bitcode/mdnodes-distinct-nodes-break-cycles.ll
new file mode 100644
index 00000000000..51701d10c03
--- /dev/null
+++ b/llvm/test/Bitcode/mdnodes-distinct-nodes-break-cycles.ll
@@ -0,0 +1,29 @@
+; RUN: llvm-as <%s | llvm-bcanalyzer -dump | FileCheck %s
+; Check that distinct nodes break uniquing cycles, so that uniqued subgraphs
+; are always in post-order.
+;
+; It may not be immediately obvious why this is an interesting graph.  There
+; are three nodes in a cycle, and one of them (!1) is distinct.  Because the
+; entry point is !2, a naive post-order traversal would give !3, !1, !2; but
+; this means when !3 is parsed the reader will need a forward reference for !2.
+; Forward references for uniqued node operands are expensive, whereas they're
+; cheap for distinct node operands.  If the distinct node is emitted first, the
+; uniqued nodes don't need any forward references at all.
+
+; Nodes in this testcase are numbered to match how they are referenced in
+; bitcode.  !3 is referenced as opN=3.
+
+; CHECK:       <DISTINCT_NODE op0=3/>
+!1 = distinct !{!3}
+
+; CHECK-NEXT:  <NODE op0=1/>
+!2 = !{!1}
+
+; CHECK-NEXT:  <NODE op0=2/>
+!3 = !{!2}
+
+; Note: named metadata nodes are not cannot reference null so their operands
+; are numbered off-by-one.
+; CHECK-NEXT:  <NAME
+; CHECK-NEXT:  <NAMED_NODE op0=1/>
+!named = !{!2}