summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2014-01-23 18:49:28 +0000
committerTom Stellard <thomas.stellard@amd.com>2014-01-23 18:49:28 +0000
commit8cce9bdf179c29c3652d4862d6317d7e2e7a411c (patch)
tree3853f3f2a08df6448a6fa1ecfb3778ee31a1da1c /llvm/lib
parentb3500e606195e9026d292a1cdd3faede6be7b0c2 (diff)
downloadbcm5719-llvm-8cce9bdf179c29c3652d4862d6317d7e2e7a411c.tar.gz
bcm5719-llvm-8cce9bdf179c29c3652d4862d6317d7e2e7a411c.zip
R600: Unconditionally unroll loops that contain GEPs with alloca pointers
Implement the getUnrollingPreferences() function for AMDGPUTargetTransformInfo so that loops that do address calculations on pointers derived from alloca are unconditionally unrolled. Unrolling these loops makes it more likely that SROA will be able to eliminate the allocas, which is a big win for R600 since memory allocated by alloca (private memory) is really slow. llvm-svn: 199916
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp29
1 files changed, 29 insertions, 0 deletions
diff --git a/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
index ca1e0b65289..a4feec71312 100644
--- a/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
@@ -18,7 +18,9 @@
#define DEBUG_TYPE "AMDGPUtti"
#include "AMDGPU.h"
#include "AMDGPUTargetMachine.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
#include "llvm/Target/TargetLowering.h"
@@ -73,6 +75,8 @@ public:
virtual bool hasBranchDivergence() const;
+ virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
+
/// @}
};
@@ -88,3 +92,28 @@ llvm::createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM) {
}
bool AMDGPUTTI::hasBranchDivergence() const { return true; }
+
+void AMDGPUTTI::getUnrollingPreferences(Loop *L,
+ UnrollingPreferences &UP) const {
+ for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end();
+ BI != BE; ++BI) {
+ BasicBlock *BB = *BI;
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I);
+ if (!GEP)
+ continue;
+ const Value *Ptr = GEP->getPointerOperand();
+ const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr));
+ if (Alloca) {
+ // We want to do whatever we can to limit the number of alloca
+ // instructions that make it through to the code generator. allocas
+ // require us to use indirect addressing, which is slow and prone to
+ // compiler bugs. If this loop does an address calculation on an
+ // alloca ptr, then we want to unconditionally unroll the loop. In most
+ // cases, this will make it possible for SROA to eliminate these allocas.
+ UP.Threshold = UINT_MAX;
+ }
+ }
+ }
+}
OpenPOWER on IntegriCloud