summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2019-02-18 23:33:05 +0000
committerSanjay Patel <spatel@rotateright.com>2019-02-18 23:33:05 +0000
commitd8b4efcb6b4a3408da72e45b4e4225b751065ac9 (patch)
tree8a8f3bf8d8c89f055fa94b81f3be94b8c64c7dad /llvm/lib/CodeGen
parent05709acba49a0ead78f33487ef377ff2ef376496 (diff)
downloadbcm5719-llvm-d8b4efcb6b4a3408da72e45b4e4225b751065ac9.tar.gz
bcm5719-llvm-d8b4efcb6b4a3408da72e45b4e4225b751065ac9.zip
[CGP] form usub with overflow from sub+icmp
The motivating x86 cases for forming the intrinsic are shown in PR31754 and PR40487: https://bugs.llvm.org/show_bug.cgi?id=31754 https://bugs.llvm.org/show_bug.cgi?id=40487 ..and those are shown in the IR test file and x86 codegen file. Matching the usubo pattern is harder than uaddo because we have 2 independent values rather than a def-use. This adds a TLI hook that should preserve the existing behavior for uaddo formation, but disables usubo formation by default. Only x86 overrides that setting for now although other targets will likely benefit by forming usbuo too. Differential Revision: https://reviews.llvm.org/D57789 llvm-svn: 354298
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp91
1 files changed, 78 insertions, 13 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 14f56279e85..20a2ae1233c 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1162,9 +1162,18 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
static void replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp,
Instruction *InsertPt,
Intrinsic::ID IID) {
+ Value *Arg0 = BO->getOperand(0);
+ Value *Arg1 = BO->getOperand(1);
+
+ // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
+ if (BO->getOpcode() == Instruction::Add &&
+ IID == Intrinsic::usub_with_overflow) {
+ assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
+ Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1));
+ }
+
IRBuilder<> Builder(InsertPt);
- Value *MathOV = Builder.CreateBinaryIntrinsic(IID, BO->getOperand(0),
- BO->getOperand(1));
+ Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
BO->replaceAllUsesWith(Math);
@@ -1182,13 +1191,8 @@ static bool combineToUAddWithOverflow(CmpInst *Cmp, const TargetLowering &TLI,
if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add))))
return false;
- // Allow the transform as long as we have an integer type that is not
- // obviously illegal and unsupported.
- Type *Ty = Add->getType();
- if (!isa<IntegerType>(Ty))
- return false;
- EVT CodegenVT = TLI.getValueType(DL, Ty);
- if (!CodegenVT.isSimple() && TLI.isOperationExpand(ISD::UADDO, CodegenVT))
+ if (!TLI.shouldFormOverflowOp(ISD::UADDO,
+ TLI.getValueType(DL, Add->getType())))
return false;
// We don't want to move around uses of condition values this late, so we
@@ -1210,6 +1214,64 @@ static bool combineToUAddWithOverflow(CmpInst *Cmp, const TargetLowering &TLI,
return true;
}
+static bool combineToUSubWithOverflow(CmpInst *Cmp, const TargetLowering &TLI,
+ const DataLayout &DL, bool &ModifiedDT) {
+ // Convert (A u> B) to (A u< B) to simplify pattern matching.
+ Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
+ ICmpInst::Predicate Pred = Cmp->getPredicate();
+ if (Pred == ICmpInst::ICMP_UGT) {
+ std::swap(A, B);
+ Pred = ICmpInst::ICMP_ULT;
+ }
+ // Convert special-case: (A == 0) is the same as (A u< 1).
+ if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
+ B = ConstantInt::get(B->getType(), 1);
+ Pred = ICmpInst::ICMP_ULT;
+ }
+ if (Pred != ICmpInst::ICMP_ULT)
+ return false;
+
+ // Walk the users of a variable operand of a compare looking for a subtract or
+ // add with that same operand. Also match the 2nd operand of the compare to
+ // the add/sub, but that may be a negated constant operand of an add.
+ Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
+ BinaryOperator *Sub = nullptr;
+ for (User *U : CmpVariableOperand->users()) {
+ // A - B, A u< B --> usubo(A, B)
+ if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
+ Sub = cast<BinaryOperator>(U);
+ break;
+ }
+
+ // A + (-C), A u< C (canonicalized form of (sub A, C))
+ const APInt *CmpC, *AddC;
+ if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
+ match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
+ Sub = cast<BinaryOperator>(U);
+ break;
+ }
+ }
+ if (!Sub)
+ return false;
+
+ if (!TLI.shouldFormOverflowOp(ISD::USUBO,
+ TLI.getValueType(DL, Sub->getType())))
+ return false;
+
+ // Pattern matched and profitability checked. Check dominance to determine the
+ // insertion point for an intrinsic that replaces the subtract and compare.
+ DominatorTree DT(*Sub->getFunction());
+ bool SubDominates = DT.dominates(Sub, Cmp);
+ if (!SubDominates && !DT.dominates(Cmp, Sub))
+ return false;
+ Instruction *InPt = SubDominates ? cast<Instruction>(Sub)
+ : cast<Instruction>(Cmp);
+ replaceMathCmpWithIntrinsic(Sub, Cmp, InPt, Intrinsic::usub_with_overflow);
+ // Reset callers - do not crash by iterating over a dead instruction.
+ ModifiedDT = true;
+ return true;
+}
+
/// Sink the given CmpInst into user blocks to reduce the number of virtual
/// registers that must be created and coalesced. This is a clear win except on
/// targets with multiple condition code registers (PowerPC), where it might
@@ -1276,14 +1338,17 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
return MadeChange;
}
-static bool optimizeCmpExpression(CmpInst *Cmp, const TargetLowering &TLI,
- const DataLayout &DL) {
+static bool optimizeCmp(CmpInst *Cmp, const TargetLowering &TLI,
+ const DataLayout &DL, bool &ModifiedDT) {
if (sinkCmpExpression(Cmp, TLI))
return true;
if (combineToUAddWithOverflow(Cmp, TLI, DL))
return true;
+ if (combineToUSubWithOverflow(Cmp, TLI, DL, ModifiedDT))
+ return true;
+
return false;
}
@@ -6770,8 +6835,8 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
return false;
}
- if (CmpInst *CI = dyn_cast<CmpInst>(I))
- if (TLI && optimizeCmpExpression(CI, *TLI, *DL))
+ if (auto *Cmp = dyn_cast<CmpInst>(I))
+ if (TLI && optimizeCmp(Cmp, *TLI, *DL, ModifiedDT))
return true;
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
OpenPOWER on IntegriCloud