diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 29 |
1 files changed, 28 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 9fb68a38415..e48c55457b0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -29,6 +29,10 @@ using namespace llvm; #define DEBUG_TYPE "AMDGPUtti" +static cl::opt<unsigned> UnrollThresholdPrivate( + "amdgpu-unroll-threshold-private", + cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"), + cl::init(800), cl::Hidden); void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP) { @@ -38,6 +42,8 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, // TODO: Do we want runtime unrolling? + // Maximum alloca size than can fit registers. Reserve 16 registers. + const unsigned MaxAlloca = (256 - 16) * 4; for (const BasicBlock *BB : L->getBlocks()) { const DataLayout &DL = BB->getModule()->getDataLayout(); for (const Instruction &I : *BB) { @@ -49,6 +55,26 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL)); if (Alloca) { + Type *Ty = Alloca->getAllocatedType(); + unsigned AllocaSize = Ty->isSized() ? DL.getTypeAllocSize(Ty) : 0; + if (AllocaSize > MaxAlloca) + continue; + + // Check if GEP depends on a value defined by this loop itself. + bool HasLoopDef = false; + for (const Value *Op : GEP->operands()) { + const Instruction *Inst = dyn_cast<Instruction>(Op); + if (!Inst || L->isLoopInvariant(Op)) + continue; + if (any_of(L->getSubLoops(), [Inst](const Loop* SubLoop) { + return SubLoop->contains(Inst); })) + continue; + HasLoopDef = true; + break; + } + if (!HasLoopDef) + continue; + // We want to do whatever we can to limit the number of alloca // instructions that make it through to the code generator. allocas // require us to use indirect addressing, which is slow and prone to @@ -59,7 +85,8 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, // // Don't use the maximum allowed value here as it will make some // programs way too big. - UP.Threshold = 800; + UP.Threshold = UnrollThresholdPrivate; + return; } } } |