Divergence analysis for GPU programs

Summary: Some optimizations such as jump threading and loop unswitching can negatively affect performance when applied to divergent branches. The divergence analysis added in this patch conservatively estimates which branches in a GPU program can diverge. This information can then help LLVM to run certain optimizations selectively. Test Plan: test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll Reviewers: resistor, hfinkel, eliben, meheff, jholewinski Subscribers: broune, bjarke.roune, madhur13490, tstellarAMD, dberlin, echristo, jholewinski, llvm-commits Differential Revision: http://reviews.llvm.org/D8576 llvm-svn: 234567
author: Jingyue Wu <jingyue@google.com> 2015-04-10 05:03:50 +0000
committer: Jingyue Wu <jingyue@google.com> 2015-04-10 05:03:50 +0000
commit: 5da831cc3109ff1fc5c8f43be2cf1a1f52e030da (patch)
tree: bc580cdb0efc9fce2e03473d920d295c31a16e78 /llvm/lib/Target
parent: 5c65f58f6487dd5d9dec43d927dd9842e8b72ba2 (diff)
download: bcm5719-llvm-5da831cc3109ff1fc5c8f43be2cf1a1f52e030da.tar.gz
bcm5719-llvm-5da831cc3109ff1fc5c8f43be2cf1a1f52e030da.zip
2 files changed, 72 insertions, 0 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index b8af04de24a..dc81802f4b5 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "NVPTXTargetTransformInfo.h"
+#include "NVPTXUtilities.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -19,6 +20,75 @@ using namespace llvm;
 
 #define DEBUG_TYPE "NVPTXtti"
 
+// Whether the given intrinsic reads threadIdx.x/y/z.
+static bool readsThreadIndex(const IntrinsicInst *II) {
+  switch (II->getIntrinsicID()) {
+    default: return false;
+    case Intrinsic::nvvm_read_ptx_sreg_tid_x:
+    case Intrinsic::nvvm_read_ptx_sreg_tid_y:
+    case Intrinsic::nvvm_read_ptx_sreg_tid_z:
+      return true;
+  }
+}
+
+static bool readsLaneId(const IntrinsicInst *II) {
+  return II->getIntrinsicID() == Intrinsic::ptx_read_laneid;
+}
+
+// Whether the given intrinsic is an atomic instruction in PTX.
+static bool isNVVMAtomic(const IntrinsicInst *II) {
+  switch (II->getIntrinsicID()) {
+    default: return false;
+    case Intrinsic::nvvm_atomic_load_add_f32:
+    case Intrinsic::nvvm_atomic_load_inc_32:
+    case Intrinsic::nvvm_atomic_load_dec_32:
+      return true;
+  }
+}
+
+bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) {
+  // Without inter-procedural analysis, we conservatively assume that arguments
+  // to __device__ functions are divergent.
+  if (const Argument *Arg = dyn_cast<Argument>(V))
+    return !isKernelFunction(*Arg->getParent());
+
+  if (const Instruction *I = dyn_cast<Instruction>(V)) {
+    // Without pointer analysis, we conservatively assume values loaded from
+    // generic or local address space are divergent.
+    if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
+      unsigned AS = LI->getPointerAddressSpace();
+      return AS == ADDRESS_SPACE_GENERIC || AS == ADDRESS_SPACE_LOCAL;
+    }
+    // Atomic instructions may cause divergence. Atomic instructions are
+    // executed sequentially across all threads in a warp. Therefore, an earlier
+    // executed thread may see different memory inputs than a later executed
+    // thread. For example, suppose *a = 0 initially.
+    //
+    //   atom.global.add.s32 d, [a], 1
+    //
+    // returns 0 for the first thread that enters the critical region, and 1 for
+    // the second thread.
+    if (I->isAtomic())
+      return true;
+    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+      // Instructions that read threadIdx are obviously divergent.
+      if (readsThreadIndex(II) || readsLaneId(II))
+        return true;
+      // Handle the NVPTX atomic instrinsics that cannot be represented as an
+      // atomic IR instruction.
+      if (isNVVMAtomic(II))
+        return true;
+    }
+    // Conservatively consider the return value of function calls as divergent.
+    // We could analyze callees with bodies more precisely using
+    // inter-procedural analysis.
+    if (isa<CallInst>(I))
+      return true;
+  }
+
+  return false;
+}
+
 unsigned NVPTXTTIImpl::getArithmeticInstrCost(
     unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
     TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index bf21e881b4b..4280888988f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -61,6 +61,8 @@ public:
 
   bool hasBranchDivergence() { return true; }
 
+  bool isSourceOfDivergence(const Value *V);
+
   unsigned getArithmeticInstrCost(
       unsigned Opcode, Type *Ty,
       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
author	Jingyue Wu <jingyue@google.com>	2015-04-10 05:03:50 +0000
committer	Jingyue Wu <jingyue@google.com>	2015-04-10 05:03:50 +0000
commit	5da831cc3109ff1fc5c8f43be2cf1a1f52e030da (patch)
tree	bc580cdb0efc9fce2e03473d920d295c31a16e78 /llvm/lib/Target
parent	5c65f58f6487dd5d9dec43d927dd9842e8b72ba2 (diff)
download	bcm5719-llvm-5da831cc3109ff1fc5c8f43be2cf1a1f52e030da.tar.gz bcm5719-llvm-5da831cc3109ff1fc5c8f43be2cf1a1f52e030da.zip