diff options
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 52 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstCombine/and-fcmp.ll | 24 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstCombine/or-fcmp.ll | 24 |
3 files changed, 68 insertions, 32 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index aaa883a7037..99f17312bb4 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1258,6 +1258,52 @@ Value *InstCombiner::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS, bool IsAnd) return nullptr; } +/// This a limited reassociation for a special case (see above) where we are +/// checking if two values are either both NAN (unordered) or not-NAN (ordered). +/// This could be handled more generally in '-reassociation', but it seems like +/// an unlikely pattern for a large number of logic ops and fcmps. +static Instruction *reassociateFCmps(BinaryOperator &BO, + InstCombiner::BuilderTy &Builder) { + Instruction::BinaryOps Opcode = BO.getOpcode(); + assert((Opcode == Instruction::And || Opcode == Instruction::Or) && + "Expecting and/or op for fcmp transform"); + + // There are 4 commuted variants of the pattern. Canonicalize operands of this + // logic op so an fcmp is operand 0 and a matching logic op is operand 1. + Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1), *X; + FCmpInst::Predicate Pred; + if (match(Op1, m_FCmp(Pred, m_Value(), m_AnyZeroFP()))) + std::swap(Op0, Op1); + + // Match inner binop and the predicate for combining 2 NAN checks into 1. + BinaryOperator *BO1; + FCmpInst::Predicate NanPred = Opcode == Instruction::And ? FCmpInst::FCMP_ORD + : FCmpInst::FCMP_UNO; + if (!match(Op0, m_FCmp(Pred, m_Value(X), m_AnyZeroFP())) || Pred != NanPred || + !match(Op1, m_BinOp(BO1)) || BO1->getOpcode() != Opcode) + return nullptr; + + // The inner logic op must have a matching fcmp operand. + Value *BO10 = BO1->getOperand(0), *BO11 = BO1->getOperand(1), *Y; + if (!match(BO10, m_FCmp(Pred, m_Value(Y), m_AnyZeroFP())) || + Pred != NanPred || X->getType() != Y->getType()) + std::swap(BO10, BO11); + + if (!match(BO10, m_FCmp(Pred, m_Value(Y), m_AnyZeroFP())) || + Pred != NanPred || X->getType() != Y->getType()) + return nullptr; + + // and (fcmp ord X, 0), (and (fcmp ord Y, 0), Z) --> and (fcmp ord X, Y), Z + // or (fcmp uno X, 0), (or (fcmp uno Y, 0), Z) --> or (fcmp uno X, Y), Z + Value *NewFCmp = Builder.CreateFCmp(Pred, X, Y); + if (auto *NewFCmpInst = dyn_cast<FCmpInst>(NewFCmp)) { + // Intersect FMF from the 2 source fcmps. + NewFCmpInst->copyIRFlags(Op0); + NewFCmpInst->andIRFlags(BO10); + } + return BinaryOperator::Create(Opcode, NewFCmp, BO11); +} + /// Match De Morgan's Laws: /// (~A & ~B) == (~(A | B)) /// (~A | ~B) == (~(A & B)) @@ -1746,6 +1792,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (Value *Res = foldLogicOfFCmps(LHS, RHS, true)) return replaceInstUsesWith(I, Res); + if (Instruction *FoldedFCmps = reassociateFCmps(I, Builder)) + return FoldedFCmps; + if (Instruction *CastedAnd = foldCastedBitwiseLogic(I)) return CastedAnd; @@ -2415,6 +2464,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (Value *Res = foldLogicOfFCmps(LHS, RHS, false)) return replaceInstUsesWith(I, Res); + if (Instruction *FoldedFCmps = reassociateFCmps(I, Builder)) + return FoldedFCmps; + if (Instruction *CastedOr = foldCastedBitwiseLogic(I)) return CastedOr; diff --git a/llvm/test/Transforms/InstCombine/and-fcmp.ll b/llvm/test/Transforms/InstCombine/and-fcmp.ll index 5b2d02c9f34..dd51c6548ee 100644 --- a/llvm/test/Transforms/InstCombine/and-fcmp.ll +++ b/llvm/test/Transforms/InstCombine/and-fcmp.ll @@ -25,10 +25,8 @@ define <2 x i1> @PR1738_vec_undef(<2 x double> %x, <2 x double> %y) { define i1 @PR41069(i1 %z, float %c, float %d) { ; CHECK-LABEL: @PR41069( -; CHECK-NEXT: [[ORD1:%.*]] = fcmp arcp ord float [[C:%.*]], 0.000000e+00 -; CHECK-NEXT: [[AND:%.*]] = and i1 [[ORD1]], [[Z:%.*]] -; CHECK-NEXT: [[ORD2:%.*]] = fcmp afn ord float [[D:%.*]], 0.000000e+00 -; CHECK-NEXT: [[R:%.*]] = and i1 [[AND]], [[ORD2]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord float [[D:%.*]], [[C:%.*]] +; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP1]], [[Z:%.*]] ; CHECK-NEXT: ret i1 [[R]] ; %ord1 = fcmp arcp ord float %c, 0.0 @@ -40,10 +38,8 @@ define i1 @PR41069(i1 %z, float %c, float %d) { define i1 @PR41069_commute(i1 %z, float %c, float %d) { ; CHECK-LABEL: @PR41069_commute( -; CHECK-NEXT: [[ORD1:%.*]] = fcmp ninf ord float [[C:%.*]], 0.000000e+00 -; CHECK-NEXT: [[AND:%.*]] = and i1 [[ORD1]], [[Z:%.*]] -; CHECK-NEXT: [[ORD2:%.*]] = fcmp reassoc ninf ord float [[D:%.*]], 0.000000e+00 -; CHECK-NEXT: [[R:%.*]] = and i1 [[ORD2]], [[AND]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ninf ord float [[D:%.*]], [[C:%.*]] +; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP1]], [[Z:%.*]] ; CHECK-NEXT: ret i1 [[R]] ; %ord1 = fcmp ninf ord float %c, 0.0 @@ -58,10 +54,8 @@ define i1 @PR41069_commute(i1 %z, float %c, float %d) { define <2 x i1> @PR41069_vec(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %d) { ; CHECK-LABEL: @PR41069_vec( ; CHECK-NEXT: [[ORD1:%.*]] = fcmp ord <2 x double> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[ORD2:%.*]] = fcmp ord <2 x double> [[C:%.*]], <double 0.000000e+00, double undef> -; CHECK-NEXT: [[AND:%.*]] = and <2 x i1> [[ORD1]], [[ORD2]] -; CHECK-NEXT: [[ORD3:%.*]] = fcmp ord <2 x double> [[D:%.*]], zeroinitializer -; CHECK-NEXT: [[R:%.*]] = and <2 x i1> [[AND]], [[ORD3]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord <2 x double> [[D:%.*]], [[C:%.*]] +; CHECK-NEXT: [[R:%.*]] = and <2 x i1> [[TMP1]], [[ORD1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %ord1 = fcmp ord <2 x double> %a, %b @@ -75,10 +69,8 @@ define <2 x i1> @PR41069_vec(<2 x double> %a, <2 x double> %b, <2 x double> %c, define <2 x i1> @PR41069_vec_commute(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %d) { ; CHECK-LABEL: @PR41069_vec_commute( ; CHECK-NEXT: [[ORD1:%.*]] = fcmp ord <2 x double> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[ORD2:%.*]] = fcmp ord <2 x double> [[C:%.*]], <double 0.000000e+00, double undef> -; CHECK-NEXT: [[AND:%.*]] = and <2 x i1> [[ORD1]], [[ORD2]] -; CHECK-NEXT: [[ORD3:%.*]] = fcmp ord <2 x double> [[D:%.*]], zeroinitializer -; CHECK-NEXT: [[R:%.*]] = and <2 x i1> [[ORD3]], [[AND]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord <2 x double> [[D:%.*]], [[C:%.*]] +; CHECK-NEXT: [[R:%.*]] = and <2 x i1> [[TMP1]], [[ORD1]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %ord1 = fcmp ord <2 x double> %a, %b diff --git a/llvm/test/Transforms/InstCombine/or-fcmp.ll b/llvm/test/Transforms/InstCombine/or-fcmp.ll index eee31d5a7d5..10ac51ae32b 100644 --- a/llvm/test/Transforms/InstCombine/or-fcmp.ll +++ b/llvm/test/Transforms/InstCombine/or-fcmp.ll @@ -26,10 +26,8 @@ define <2 x i1> @PR1738_vec_undef(<2 x double> %x, <2 x double> %y) { define i1 @PR41069(double %a, double %b, double %c, double %d) { ; CHECK-LABEL: @PR41069( ; CHECK-NEXT: [[UNO1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[UNO2:%.*]] = fcmp uno double [[C:%.*]], 0.000000e+00 -; CHECK-NEXT: [[OR:%.*]] = or i1 [[UNO1]], [[UNO2]] -; CHECK-NEXT: [[UNO3:%.*]] = fcmp uno double [[D:%.*]], 0.000000e+00 -; CHECK-NEXT: [[R:%.*]] = or i1 [[OR]], [[UNO3]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[D:%.*]], [[C:%.*]] +; CHECK-NEXT: [[R:%.*]] = or i1 [[TMP1]], [[UNO1]] ; CHECK-NEXT: ret i1 [[R]] ; %uno1 = fcmp uno double %a, %b @@ -43,10 +41,8 @@ define i1 @PR41069(double %a, double %b, double %c, double %d) { define i1 @PR41069_commute(double %a, double %b, double %c, double %d) { ; CHECK-LABEL: @PR41069_commute( ; CHECK-NEXT: [[UNO1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[UNO2:%.*]] = fcmp uno double [[C:%.*]], 0.000000e+00 -; CHECK-NEXT: [[OR:%.*]] = or i1 [[UNO1]], [[UNO2]] -; CHECK-NEXT: [[UNO3:%.*]] = fcmp uno double [[D:%.*]], 0.000000e+00 -; CHECK-NEXT: [[R:%.*]] = or i1 [[UNO3]], [[OR]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double [[D:%.*]], [[C:%.*]] +; CHECK-NEXT: [[R:%.*]] = or i1 [[TMP1]], [[UNO1]] ; CHECK-NEXT: ret i1 [[R]] ; %uno1 = fcmp uno double %a, %b @@ -59,10 +55,8 @@ define i1 @PR41069_commute(double %a, double %b, double %c, double %d) { define <2 x i1> @PR41069_vec(<2 x i1> %z, <2 x float> %c, <2 x float> %d) { ; CHECK-LABEL: @PR41069_vec( -; CHECK-NEXT: [[UNO1:%.*]] = fcmp uno <2 x float> [[C:%.*]], zeroinitializer -; CHECK-NEXT: [[OR:%.*]] = or <2 x i1> [[UNO1]], [[Z:%.*]] -; CHECK-NEXT: [[UNO2:%.*]] = fcmp uno <2 x float> [[D:%.*]], <float 0.000000e+00, float undef> -; CHECK-NEXT: [[R:%.*]] = or <2 x i1> [[OR]], [[UNO2]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno <2 x float> [[D:%.*]], [[C:%.*]] +; CHECK-NEXT: [[R:%.*]] = or <2 x i1> [[TMP1]], [[Z:%.*]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %uno1 = fcmp uno <2 x float> %c, zeroinitializer @@ -74,10 +68,8 @@ define <2 x i1> @PR41069_vec(<2 x i1> %z, <2 x float> %c, <2 x float> %d) { define <2 x i1> @PR41069_vec_commute(<2 x i1> %z, <2 x float> %c, <2 x float> %d) { ; CHECK-LABEL: @PR41069_vec_commute( -; CHECK-NEXT: [[UNO1:%.*]] = fcmp uno <2 x float> [[C:%.*]], zeroinitializer -; CHECK-NEXT: [[OR:%.*]] = or <2 x i1> [[UNO1]], [[Z:%.*]] -; CHECK-NEXT: [[UNO2:%.*]] = fcmp uno <2 x float> [[D:%.*]], <float 0.000000e+00, float undef> -; CHECK-NEXT: [[R:%.*]] = or <2 x i1> [[UNO2]], [[OR]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno <2 x float> [[D:%.*]], [[C:%.*]] +; CHECK-NEXT: [[R:%.*]] = or <2 x i1> [[TMP1]], [[Z:%.*]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; %uno1 = fcmp uno <2 x float> %c, zeroinitializer |

