diff options
| -rw-r--r-- | llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp | 141 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll | 50 |
2 files changed, 183 insertions, 8 deletions
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index bccd8164ed4..44ca2b78b38 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -157,6 +157,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" @@ -324,7 +327,9 @@ public: AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<ScalarEvolutionWrapperPass>(); AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); AU.setPreservesCFG(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); } bool doInitialization(Module &M) override { @@ -395,10 +400,20 @@ private: /// Verify F is free of dead code. void verifyNoDeadCode(Function &F); + bool hasMoreThanOneUseInLoop(Value *v, Loop *L); + // Swap the index operand of two GEP. + void swapGEPOperand(GetElementPtrInst *First, GetElementPtrInst *Second); + // Check if it is safe to swap operand of two GEP. + bool isLegalToSwapOperand(GetElementPtrInst *First, GetElementPtrInst *Second, + Loop *CurLoop); + const DataLayout *DL; DominatorTree *DT; ScalarEvolution *SE; const TargetMachine *TM; + + LoopInfo *LI; + TargetLibraryInfo *TLI; /// Whether to lower a GEP with multiple indices into arithmetic operations or /// multiple GEPs with a single index. bool LowerGEP; @@ -414,6 +429,8 @@ INITIALIZE_PASS_BEGIN( INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END( SeparateConstOffsetFromGEP, "separate-const-offset-from-gep", "Split GEPs to a variadic base and a constant offset for better CSE", false, @@ -756,6 +773,13 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( Type *I8PtrTy = Builder.getInt8PtrTy(Variadic->getType()->getPointerAddressSpace()); Value *ResultPtr = Variadic->getOperand(0); + Loop *L = LI->getLoopFor(Variadic->getParent()); + // Check if the base is not loop invariant or used more than once. + bool isSwapCandidate = + L && L->isLoopInvariant(ResultPtr) && + !hasMoreThanOneUseInLoop(ResultPtr, L); + Value *FirstResult = nullptr; + if (ResultPtr->getType() != I8PtrTy) ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); @@ -784,6 +808,8 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( // Create an ugly GEP with a single index for each index. ResultPtr = Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep"); + if (FirstResult == nullptr) + FirstResult = ResultPtr; } } @@ -792,7 +818,17 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset); ResultPtr = Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep"); - } + } else + isSwapCandidate = false; + + // If we created a GEP with constant index, and the base is loop invariant, + // then we swap the first one with it, so LICM can move constant GEP out + // later. + GetElementPtrInst *FirstGEP = dyn_cast<GetElementPtrInst>(FirstResult); + GetElementPtrInst *SecondGEP = dyn_cast<GetElementPtrInst>(ResultPtr); + if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L)) + swapGEPOperand(FirstGEP, SecondGEP); + if (ResultPtr->getType() != Variadic->getType()) ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType()); @@ -1036,16 +1072,15 @@ bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) { DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); - + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); bool Changed = false; for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) { - for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ) { - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++)) { + for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE;) + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++)) Changed |= splitGEP(GEP); - } - // No need to split GEP ConstantExprs because all its indices are constant - // already. - } + // No need to split GEP ConstantExprs because all its indices are constant + // already. } Changed |= reuniteExts(F); @@ -1138,3 +1173,93 @@ void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) { } } } + +bool SeparateConstOffsetFromGEP::isLegalToSwapOperand( + GetElementPtrInst *FirstGEP, GetElementPtrInst *SecondGEP, Loop *CurLoop) { + if (!FirstGEP || !FirstGEP->hasOneUse()) + return false; + + if (!SecondGEP || FirstGEP->getParent() != SecondGEP->getParent()) + return false; + + if (FirstGEP == SecondGEP) + return false; + + unsigned FirstNum = FirstGEP->getNumOperands(); + unsigned SecondNum = SecondGEP->getNumOperands(); + // Give up if the number of operands are not 2. + if (FirstNum != SecondNum || FirstNum != 2) + return false; + + Value *FirstBase = FirstGEP->getOperand(0); + Value *SecondBase = SecondGEP->getOperand(0); + Value *FirstOffset = FirstGEP->getOperand(1); + // Give up if the index of the first GEP is loop invariant. + if (CurLoop->isLoopInvariant(FirstOffset)) + return false; + + // Give up if base doesn't have same type. + if (FirstBase->getType() != SecondBase->getType()) + return false; + + Instruction *FirstOffsetDef = dyn_cast<Instruction>(FirstOffset); + + // Check if the second operand of first GEP has constant coefficient. + // For an example, for the following code, we won't gain anything by + // hoisting the second GEP out because the second GEP can be folded away. + // %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256 + // %67 = shl i64 %scevgep.sum.ur159, 2 + // %uglygep160 = getelementptr i8* %65, i64 %67 + // %uglygep161 = getelementptr i8* %uglygep160, i64 -1024 + + // Skip constant shift instruction which may be generated by Splitting GEPs. + if (FirstOffsetDef && FirstOffsetDef->isShift() && + dyn_cast<ConstantInt>(FirstOffsetDef->getOperand(1))) + FirstOffsetDef = dyn_cast<Instruction>(FirstOffsetDef->getOperand(0)); + + // Give up if FirstOffsetDef is an Add or Sub with constant. + // Because it may not profitable at all due to constant folding. + if (FirstOffsetDef) + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FirstOffsetDef)) { + unsigned opc = BO->getOpcode(); + if ((opc == Instruction::Add || opc == Instruction::Sub) && + (dyn_cast<ConstantInt>(BO->getOperand(0)) || + dyn_cast<ConstantInt>(BO->getOperand(1)))) + return false; + } + return true; +} + +bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value *V, Loop *L) { + int UsesInLoop = 0; + for (User *U : V->users()) { + if (Instruction *User = dyn_cast<Instruction>(U)) + if (L->contains(User)) + if (++UsesInLoop > 1) + return true; + } + return false; +} + +void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First, + GetElementPtrInst *Second) { + Value *Offset1 = First->getOperand(1); + Value *Offset2 = Second->getOperand(1); + First->setOperand(1, Offset2); + Second->setOperand(1, Offset1); + + // We changed p+o+c to p+c+o, p+c may not be inbound anymore. + const DataLayout &DAL = First->getModule()->getDataLayout(); + APInt Offset(DAL.getPointerSizeInBits( + cast<PointerType>(First->getType())->getAddressSpace()), + 0); + Value *NewBase = + First->stripAndAccumulateInBoundsConstantOffsets(DAL, Offset); + uint64_t ObjectSize; + if (!getObjectSize(NewBase, ObjectSize, DAL, TLI) || + Offset.ugt(ObjectSize)) { + First->setIsInBounds(false); + Second->setIsInBounds(false); + } else + First->setIsInBounds(true); +} diff --git a/llvm/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll b/llvm/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll new file mode 100644 index 00000000000..84277995ce5 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll @@ -0,0 +1,50 @@ +; RUN: llc -O3 -aarch64-gep-opt=true -print-after=codegenprepare -mcpu=cortex-a53 < %s >%t 2>&1 && FileCheck <%t %s +; REQUIRES: asserts +target triple = "aarch64--linux-android" + +%typeD = type { i32, i32, [256 x i32], [257 x i32] } + +; Function Attrs: noreturn nounwind uwtable +define i32 @test1(%typeD* nocapture %s) { +entry: +; CHECK-LABEL: entry: +; CHECK: %uglygep = getelementptr i8, i8* %0, i64 1032 +; CHECK: br label %do.body.i + + + %tPos = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 0 + %k0 = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 1 + %.pre = load i32, i32* %tPos, align 4 + br label %do.body.i + +do.body.i: +; CHECK-LABEL: do.body.i: +; CHECK: %uglygep2 = getelementptr i8, i8* %uglygep, i64 %3 +; CHECK-NEXT: %4 = bitcast i8* %uglygep2 to i32* +; CHECK-NOT: %uglygep2 = getelementptr i8, i8* %uglygep, i64 1032 + + + %0 = phi i32 [ 256, %entry ], [ %.be, %do.body.i.backedge ] + %1 = phi i32 [ 0, %entry ], [ %.be6, %do.body.i.backedge ] + %add.i = add nsw i32 %1, %0 + %shr.i = ashr i32 %add.i, 1 + %idxprom.i = sext i32 %shr.i to i64 + %arrayidx.i = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 3, i64 %idxprom.i + %2 = load i32, i32* %arrayidx.i, align 4 + %cmp.i = icmp sle i32 %2, %.pre + %na.1.i = select i1 %cmp.i, i32 %0, i32 %shr.i + %nb.1.i = select i1 %cmp.i, i32 %shr.i, i32 %1 + %sub.i = sub nsw i32 %na.1.i, %nb.1.i + %cmp1.i = icmp eq i32 %sub.i, 1 + br i1 %cmp1.i, label %fooo.exit, label %do.body.i.backedge + +do.body.i.backedge: + %.be = phi i32 [ %na.1.i, %do.body.i ], [ 256, %fooo.exit ] + %.be6 = phi i32 [ %nb.1.i, %do.body.i ], [ 0, %fooo.exit ] + br label %do.body.i + +fooo.exit: ; preds = %do.body.i + store i32 %nb.1.i, i32* %k0, align 4 + br label %do.body.i.backedge +} + |

