diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2019-02-18 23:33:05 +0000 |
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2019-02-18 23:33:05 +0000 |
| commit | d8b4efcb6b4a3408da72e45b4e4225b751065ac9 (patch) | |
| tree | 8a8f3bf8d8c89f055fa94b81f3be94b8c64c7dad /llvm/lib/CodeGen | |
| parent | 05709acba49a0ead78f33487ef377ff2ef376496 (diff) | |
| download | bcm5719-llvm-d8b4efcb6b4a3408da72e45b4e4225b751065ac9.tar.gz bcm5719-llvm-d8b4efcb6b4a3408da72e45b4e4225b751065ac9.zip | |
[CGP] form usub with overflow from sub+icmp
The motivating x86 cases for forming the intrinsic are shown in PR31754 and PR40487:
https://bugs.llvm.org/show_bug.cgi?id=31754
https://bugs.llvm.org/show_bug.cgi?id=40487
..and those are shown in the IR test file and x86 codegen file.
Matching the usubo pattern is harder than uaddo because we have 2 independent values rather than a def-use.
This adds a TLI hook that should preserve the existing behavior for uaddo formation, but disables usubo
formation by default. Only x86 overrides that setting for now although other targets will likely benefit
by forming usbuo too.
Differential Revision: https://reviews.llvm.org/D57789
llvm-svn: 354298
Diffstat (limited to 'llvm/lib/CodeGen')
| -rw-r--r-- | llvm/lib/CodeGen/CodeGenPrepare.cpp | 91 |
1 files changed, 78 insertions, 13 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 14f56279e85..20a2ae1233c 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -1162,9 +1162,18 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, static void replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp, Instruction *InsertPt, Intrinsic::ID IID) { + Value *Arg0 = BO->getOperand(0); + Value *Arg1 = BO->getOperand(1); + + // We allow matching the canonical IR (add X, C) back to (usubo X, -C). + if (BO->getOpcode() == Instruction::Add && + IID == Intrinsic::usub_with_overflow) { + assert(isa<Constant>(Arg1) && "Unexpected input for usubo"); + Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1)); + } + IRBuilder<> Builder(InsertPt); - Value *MathOV = Builder.CreateBinaryIntrinsic(IID, BO->getOperand(0), - BO->getOperand(1)); + Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1); Value *Math = Builder.CreateExtractValue(MathOV, 0, "math"); Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov"); BO->replaceAllUsesWith(Math); @@ -1182,13 +1191,8 @@ static bool combineToUAddWithOverflow(CmpInst *Cmp, const TargetLowering &TLI, if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) return false; - // Allow the transform as long as we have an integer type that is not - // obviously illegal and unsupported. - Type *Ty = Add->getType(); - if (!isa<IntegerType>(Ty)) - return false; - EVT CodegenVT = TLI.getValueType(DL, Ty); - if (!CodegenVT.isSimple() && TLI.isOperationExpand(ISD::UADDO, CodegenVT)) + if (!TLI.shouldFormOverflowOp(ISD::UADDO, + TLI.getValueType(DL, Add->getType()))) return false; // We don't want to move around uses of condition values this late, so we @@ -1210,6 +1214,64 @@ static bool combineToUAddWithOverflow(CmpInst *Cmp, const TargetLowering &TLI, return true; } +static bool combineToUSubWithOverflow(CmpInst *Cmp, const TargetLowering &TLI, + const DataLayout &DL, bool &ModifiedDT) { + // Convert (A u> B) to (A u< B) to simplify pattern matching. + Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1); + ICmpInst::Predicate Pred = Cmp->getPredicate(); + if (Pred == ICmpInst::ICMP_UGT) { + std::swap(A, B); + Pred = ICmpInst::ICMP_ULT; + } + // Convert special-case: (A == 0) is the same as (A u< 1). + if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) { + B = ConstantInt::get(B->getType(), 1); + Pred = ICmpInst::ICMP_ULT; + } + if (Pred != ICmpInst::ICMP_ULT) + return false; + + // Walk the users of a variable operand of a compare looking for a subtract or + // add with that same operand. Also match the 2nd operand of the compare to + // the add/sub, but that may be a negated constant operand of an add. + Value *CmpVariableOperand = isa<Constant>(A) ? B : A; + BinaryOperator *Sub = nullptr; + for (User *U : CmpVariableOperand->users()) { + // A - B, A u< B --> usubo(A, B) + if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) { + Sub = cast<BinaryOperator>(U); + break; + } + + // A + (-C), A u< C (canonicalized form of (sub A, C)) + const APInt *CmpC, *AddC; + if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) && + match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) { + Sub = cast<BinaryOperator>(U); + break; + } + } + if (!Sub) + return false; + + if (!TLI.shouldFormOverflowOp(ISD::USUBO, + TLI.getValueType(DL, Sub->getType()))) + return false; + + // Pattern matched and profitability checked. Check dominance to determine the + // insertion point for an intrinsic that replaces the subtract and compare. + DominatorTree DT(*Sub->getFunction()); + bool SubDominates = DT.dominates(Sub, Cmp); + if (!SubDominates && !DT.dominates(Cmp, Sub)) + return false; + Instruction *InPt = SubDominates ? cast<Instruction>(Sub) + : cast<Instruction>(Cmp); + replaceMathCmpWithIntrinsic(Sub, Cmp, InPt, Intrinsic::usub_with_overflow); + // Reset callers - do not crash by iterating over a dead instruction. + ModifiedDT = true; + return true; +} + /// Sink the given CmpInst into user blocks to reduce the number of virtual /// registers that must be created and coalesced. This is a clear win except on /// targets with multiple condition code registers (PowerPC), where it might @@ -1276,14 +1338,17 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) { return MadeChange; } -static bool optimizeCmpExpression(CmpInst *Cmp, const TargetLowering &TLI, - const DataLayout &DL) { +static bool optimizeCmp(CmpInst *Cmp, const TargetLowering &TLI, + const DataLayout &DL, bool &ModifiedDT) { if (sinkCmpExpression(Cmp, TLI)) return true; if (combineToUAddWithOverflow(Cmp, TLI, DL)) return true; + if (combineToUSubWithOverflow(Cmp, TLI, DL, ModifiedDT)) + return true; + return false; } @@ -6770,8 +6835,8 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { return false; } - if (CmpInst *CI = dyn_cast<CmpInst>(I)) - if (TLI && optimizeCmpExpression(CI, *TLI, *DL)) + if (auto *Cmp = dyn_cast<CmpInst>(I)) + if (TLI && optimizeCmp(Cmp, *TLI, *DL, ModifiedDT)) return true; if (LoadInst *LI = dyn_cast<LoadInst>(I)) { |

