From 5da831cc3109ff1fc5c8f43be2cf1a1f52e030da Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Fri, 10 Apr 2015 05:03:50 +0000 Subject: Divergence analysis for GPU programs Summary: Some optimizations such as jump threading and loop unswitching can negatively affect performance when applied to divergent branches. The divergence analysis added in this patch conservatively estimates which branches in a GPU program can diverge. This information can then help LLVM to run certain optimizations selectively. Test Plan: test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll Reviewers: resistor, hfinkel, eliben, meheff, jholewinski Subscribers: broune, bjarke.roune, madhur13490, tstellarAMD, dberlin, echristo, jholewinski, llvm-commits Differential Revision: http://reviews.llvm.org/D8576 llvm-svn: 234567 --- llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp | 70 ++++++++++++++++++++++ 1 file changed, 70 insertions(+) (limited to 'llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp') diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index b8af04de24a..dc81802f4b5 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "NVPTXTargetTransformInfo.h" +#include "NVPTXUtilities.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -19,6 +20,75 @@ using namespace llvm; #define DEBUG_TYPE "NVPTXtti" +// Whether the given intrinsic reads threadIdx.x/y/z. +static bool readsThreadIndex(const IntrinsicInst *II) { + switch (II->getIntrinsicID()) { + default: return false; + case Intrinsic::nvvm_read_ptx_sreg_tid_x: + case Intrinsic::nvvm_read_ptx_sreg_tid_y: + case Intrinsic::nvvm_read_ptx_sreg_tid_z: + return true; + } +} + +static bool readsLaneId(const IntrinsicInst *II) { + return II->getIntrinsicID() == Intrinsic::ptx_read_laneid; +} + +// Whether the given intrinsic is an atomic instruction in PTX. +static bool isNVVMAtomic(const IntrinsicInst *II) { + switch (II->getIntrinsicID()) { + default: return false; + case Intrinsic::nvvm_atomic_load_add_f32: + case Intrinsic::nvvm_atomic_load_inc_32: + case Intrinsic::nvvm_atomic_load_dec_32: + return true; + } +} + +bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) { + // Without inter-procedural analysis, we conservatively assume that arguments + // to __device__ functions are divergent. + if (const Argument *Arg = dyn_cast(V)) + return !isKernelFunction(*Arg->getParent()); + + if (const Instruction *I = dyn_cast(V)) { + // Without pointer analysis, we conservatively assume values loaded from + // generic or local address space are divergent. + if (const LoadInst *LI = dyn_cast(I)) { + unsigned AS = LI->getPointerAddressSpace(); + return AS == ADDRESS_SPACE_GENERIC || AS == ADDRESS_SPACE_LOCAL; + } + // Atomic instructions may cause divergence. Atomic instructions are + // executed sequentially across all threads in a warp. Therefore, an earlier + // executed thread may see different memory inputs than a later executed + // thread. For example, suppose *a = 0 initially. + // + // atom.global.add.s32 d, [a], 1 + // + // returns 0 for the first thread that enters the critical region, and 1 for + // the second thread. + if (I->isAtomic()) + return true; + if (const IntrinsicInst *II = dyn_cast(I)) { + // Instructions that read threadIdx are obviously divergent. + if (readsThreadIndex(II) || readsLaneId(II)) + return true; + // Handle the NVPTX atomic instrinsics that cannot be represented as an + // atomic IR instruction. + if (isNVVMAtomic(II)) + return true; + } + // Conservatively consider the return value of function calls as divergent. + // We could analyze callees with bodies more precisely using + // inter-procedural analysis. + if (isa(I)) + return true; + } + + return false; +} + unsigned NVPTXTTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, -- cgit v1.2.3