3 files changed, 110 insertions, 35 deletions
diff --git a/llvm/include/llvm/Transforms/Scalar/Reassociate.h b/llvm/include/llvm/Transforms/Scalar/Reassociate.h
index ba7586dffd9..17e544c6786 100644
--- a/llvm/include/llvm/Transforms/Scalar/Reassociate.h
+++ b/llvm/include/llvm/Transforms/Scalar/Reassociate.h
@@ -118,6 +118,7 @@ private:
   void OptimizeInst(Instruction *I);
   Instruction *canonicalizeNegConstExpr(Instruction *I);
   void BuildPairMap(ReversePostOrderTraversal<Function *> &RPOT);
+  void swapOperandsToMatchBinops(BinaryOperator &B);
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 03cd7c10150..79ab44ff167 100644
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -63,6 +63,7 @@
 
 using namespace llvm;
 using namespace reassociate;
+using namespace PatternMatch;
 
 #define DEBUG_TYPE "reassociate"
 
@@ -2131,6 +2132,66 @@ void ReassociatePass::OptimizeInst(Instruction *I) {
   ReassociateExpression(BO);
 }
 
+/// If we have an associative pair of binops with the same opcode and 2 of the 3
+/// operands to that pair of binops are some other matching binop, rearrange the
+/// operands of the associative binops so the matching ops are paired together.
+/// This transform creates factoring opportunities by pairing opcodes.
+/// TODO: Should those factoring optimizations be handled here or InstCombine?
+/// Example:
+///   ((X << S) & Y) & (Z << S) --> ((X << S) & (Z << S)) & Y (reassociation)
+///     --> ((X & Z) << S) & Y (factorize shift from 'and' ops optimization)
+void ReassociatePass::swapOperandsToMatchBinops(BinaryOperator &B) {
+  BinaryOperator *B0, *B1;
+  if (!B.isAssociative() || !B.isCommutative() ||
+      !match(&B, m_BinOp(m_BinOp(B0), m_BinOp(B1))))
+    return;
+
+  // We have (B0 op B1) where both operands are also binops.
+  // Canonicalize a binop with the same opcode as the parent binop (B) to B0 and
+  // a binop with a different opcode to B1.
+  Instruction::BinaryOps TopOpc = B.getOpcode();
+  if (B0->getOpcode() != TopOpc)
+    std::swap(B0, B1);
+
+  // If (1) we don't have a pair of binops with the same opcode or (2) B0 and B1
+  // already have the same opcode, there is nothing to do. If the binop with the
+  // same opcode (B0) has more than one use, reassociation would result in more
+  // instructions, so bail out.
+  Instruction::BinaryOps OtherOpc = B1->getOpcode();
+  if (B0->getOpcode() != TopOpc || !B0->hasOneUse() || OtherOpc == TopOpc)
+    return;
+
+  // Canonicalize a binop that matches B1 to V00 (operand 0 of B0) and a value
+  // that does not match B1 to V01.
+  Value *V00 = B0->getOperand(0), *V01 = B0->getOperand(1);
+  if (!match(V00, m_BinOp()) ||
+      cast<BinaryOperator>(V00)->getOpcode() != OtherOpc)
+    std::swap(V00, V01);
+
+  // We need a binop with the same opcode in V00, and a value with a different
+  // opcode in V01.
+  BinaryOperator *B00, *B01;
+  if (!match(V00, m_BinOp(B00)) || B00->getOpcode() != OtherOpc ||
+      (match(V01, m_BinOp(B01)) && B01->getOpcode() == OtherOpc))
+    return;
+
+  // B00 and B1 are displaced matching binops, so pull them together:
+  // (B00 & V01) & B1  --> (B00 & B1) & V01
+  IRBuilder<> Builder(&B);
+  Builder.SetInstDebugLocation(&B);
+  Value *NewBO1 = Builder.CreateBinOp(TopOpc, B00, B1);
+  Value *NewBO2 = Builder.CreateBinOp(TopOpc, NewBO1, V01);
+
+  // Fast-math-flags propagate from B; wrapping flags are cleared.
+  if (auto *I1 = dyn_cast<Instruction>(NewBO1))
+    I1->copyIRFlags(&B, false);
+  if (auto *I2 = dyn_cast<Instruction>(NewBO2))
+    I2->copyIRFlags(&B, false);
+
+  B.replaceAllUsesWith(NewBO2);
+  return;
+}
+
 void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
   // First, walk the expression tree, linearizing the tree, collecting the
   // operand information.
@@ -2250,6 +2311,9 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
   // Now that we ordered and optimized the expressions, splat them back into
   // the expression tree, removing any unneeded nodes.
   RewriteExprTree(I, Ops);
+
+  // Try a final reassociation of the root of the tree.
+  swapOperandsToMatchBinops(*I);
 }
 
 void
diff --git a/llvm/test/Transforms/Reassociate/matching-binops.ll b/llvm/test/Transforms/Reassociate/matching-binops.ll
index 4771e3c8e1c..23cb79df21c 100644
--- a/llvm/test/Transforms/Reassociate/matching-binops.ll
+++ b/llvm/test/Transforms/Reassociate/matching-binops.ll
@@ -16,8 +16,8 @@ define i8 @and_shl(i8 %x, i8 %y, i8 %z, i8 %shamt) {
 ; CHECK-LABEL: @and_shl(
 ; CHECK-NEXT:    [[SX:%.*]] = shl i8 [[X:%.*]], [[SHAMT:%.*]]
 ; CHECK-NEXT:    [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]]
-; CHECK-NEXT:    [[A:%.*]] = and i8 [[SX]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[A]], [[SY]]
+; CHECK-NEXT:    [[A:%.*]] = and i8 [[SX]], [[SY]]
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[A]], [[Z:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sx = shl i8 %x, %shamt
@@ -31,8 +31,8 @@ define i8 @or_shl(i8 %x, i8 %y, i8 %z, i8 %shamt) {
 ; CHECK-LABEL: @or_shl(
 ; CHECK-NEXT:    [[SX:%.*]] = shl i8 [[X:%.*]], [[SHAMT:%.*]]
 ; CHECK-NEXT:    [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]]
-; CHECK-NEXT:    [[A:%.*]] = or i8 [[SX]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = or i8 [[A]], [[SY]]
+; CHECK-NEXT:    [[A:%.*]] = or i8 [[SX]], [[SY]]
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[A]], [[Z:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sx = shl i8 %x, %shamt
@@ -46,8 +46,8 @@ define i8 @xor_shl(i8 %x, i8 %y, i8 %z, i8 %shamt) {
 ; CHECK-LABEL: @xor_shl(
 ; CHECK-NEXT:    [[SX:%.*]] = shl i8 [[X:%.*]], [[SHAMT:%.*]]
 ; CHECK-NEXT:    [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]]
-; CHECK-NEXT:    [[A:%.*]] = xor i8 [[SX]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = xor i8 [[A]], [[SY]]
+; CHECK-NEXT:    [[A:%.*]] = xor i8 [[SX]], [[SY]]
+; CHECK-NEXT:    [[R:%.*]] = xor i8 [[A]], [[Z:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sx = shl i8 %x, %shamt
@@ -61,8 +61,8 @@ define i8 @and_lshr(i8 %x, i8 %y, i8 %z, i8 %shamt) {
 ; CHECK-LABEL: @and_lshr(
 ; CHECK-NEXT:    [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]]
 ; CHECK-NEXT:    [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]]
-; CHECK-NEXT:    [[A:%.*]] = and i8 [[SX]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[A]], [[SY]]
+; CHECK-NEXT:    [[A:%.*]] = and i8 [[SX]], [[SY]]
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[A]], [[Z:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sx = lshr i8 %x, %shamt
@@ -76,8 +76,8 @@ define i8 @or_lshr(i8 %x, i8 %y, i8 %z, i8 %shamt) {
 ; CHECK-LABEL: @or_lshr(
 ; CHECK-NEXT:    [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]]
 ; CHECK-NEXT:    [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]]
-; CHECK-NEXT:    [[A:%.*]] = or i8 [[SX]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = or i8 [[A]], [[SY]]
+; CHECK-NEXT:    [[A:%.*]] = or i8 [[SX]], [[SY]]
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[A]], [[Z:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sx = lshr i8 %x, %shamt
@@ -91,8 +91,8 @@ define i8 @xor_lshr(i8 %x, i8 %y, i8 %z, i8 %shamt) {
 ; CHECK-LABEL: @xor_lshr(
 ; CHECK-NEXT:    [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]]
 ; CHECK-NEXT:    [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]]
-; CHECK-NEXT:    [[A:%.*]] = xor i8 [[SX]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = xor i8 [[A]], [[SY]]
+; CHECK-NEXT:    [[A:%.*]] = xor i8 [[SX]], [[SY]]
+; CHECK-NEXT:    [[R:%.*]] = xor i8 [[A]], [[Z:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sx = lshr i8 %x, %shamt
@@ -106,8 +106,8 @@ define i8 @and_ashr(i8 %x, i8 %y, i8 %z, i8 %shamt) {
 ; CHECK-LABEL: @and_ashr(
 ; CHECK-NEXT:    [[SX:%.*]] = ashr i8 [[X:%.*]], [[SHAMT:%.*]]
 ; CHECK-NEXT:    [[SY:%.*]] = ashr i8 [[Y:%.*]], [[SHAMT]]
-; CHECK-NEXT:    [[A:%.*]] = and i8 [[SX]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[A]], [[SY]]
+; CHECK-NEXT:    [[A:%.*]] = and i8 [[SX]], [[SY]]
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[A]], [[Z:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sx = ashr i8 %x, %shamt
@@ -121,8 +121,8 @@ define i8 @or_ashr(i8 %x, i8 %y, i8 %z, i8 %shamt) {
 ; CHECK-LABEL: @or_ashr(
 ; CHECK-NEXT:    [[SX:%.*]] = ashr i8 [[X:%.*]], [[SHAMT:%.*]]
 ; CHECK-NEXT:    [[SY:%.*]] = ashr i8 [[Y:%.*]], [[SHAMT]]
-; CHECK-NEXT:    [[A:%.*]] = or i8 [[SX]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = or i8 [[A]], [[SY]]
+; CHECK-NEXT:    [[A:%.*]] = or i8 [[SX]], [[SY]]
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[A]], [[Z:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sx = ashr i8 %x, %shamt
@@ -138,8 +138,8 @@ define <2 x i8> @xor_ashr(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z, <2 x i8> %shamt
 ; CHECK-LABEL: @xor_ashr(
 ; CHECK-NEXT:    [[SX:%.*]] = ashr <2 x i8> [[X:%.*]], [[SHAMT:%.*]]
 ; CHECK-NEXT:    [[SY:%.*]] = ashr <2 x i8> [[Y:%.*]], [[SHAMT]]
-; CHECK-NEXT:    [[A:%.*]] = xor <2 x i8> [[SX]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = xor <2 x i8> [[A]], [[SY]]
+; CHECK-NEXT:    [[A:%.*]] = xor <2 x i8> [[SX]], [[SY]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i8> [[A]], [[Z:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %sx = ashr <2 x i8> %x, %shamt
@@ -203,19 +203,29 @@ define i8 @xor_lshr_multiuse(i8 %x, i8 %y, i8 %z, i8 %shamt) {
 }
 
 ; Math ops work too. Change instruction positions too to verify placement.
+; We only care about extra uses of the first associative value - in this
+; case, it's %a. Everything else can have extra uses.
+
+declare void @use(i8)
 
 define i8 @add_lshr(i8 %x, i8 %y, i8 %z, i8 %shamt) {
 ; CHECK-LABEL: @add_lshr(
 ; CHECK-NEXT:    [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]]
-; CHECK-NEXT:    [[A:%.*]] = add i8 [[SX]], [[Z:%.*]]
 ; CHECK-NEXT:    [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]]
-; CHECK-NEXT:    [[R:%.*]] = add i8 [[A]], [[SY]]
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[SX]], [[SY]]
+; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    call void @use(i8 [[SX]])
+; CHECK-NEXT:    call void @use(i8 [[SY]])
+; CHECK-NEXT:    call void @use(i8 [[TMP2]])
+; CHECK-NEXT:    ret i8 [[TMP2]]
 ;
   %sx = lshr i8 %x, %shamt
   %a = add i8 %sx, %z
   %sy = lshr i8 %y, %shamt
   %r = add i8 %a, %sy
+  call void @use(i8 %sx)
+  call void @use(i8 %sy)
+  call void @use(i8 %r)
   ret i8 %r
 }
 
@@ -225,8 +235,8 @@ define i8 @mul_sub(i8 %x, i8 %y, i8 %z, i8 %m) {
 ; CHECK-LABEL: @mul_sub(
 ; CHECK-NEXT:    [[SX:%.*]] = sub i8 [[X:%.*]], [[M:%.*]]
 ; CHECK-NEXT:    [[SY:%.*]] = sub i8 [[Y:%.*]], [[M]]
-; CHECK-NEXT:    [[A:%.*]] = mul nsw i8 [[SX]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = mul nuw i8 [[A]], [[SY]]
+; CHECK-NEXT:    [[A:%.*]] = mul i8 [[SX]], [[SY]]
+; CHECK-NEXT:    [[R:%.*]] = mul i8 [[A]], [[Z:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sx = sub i8 %x, %m
@@ -239,9 +249,9 @@ define i8 @mul_sub(i8 %x, i8 %y, i8 %z, i8 %m) {
 define i8 @add_mul(i8 %x, i8 %y, i8 %z, i8 %m) {
 ; CHECK-LABEL: @add_mul(
 ; CHECK-NEXT:    [[SX:%.*]] = mul nuw i8 [[X:%.*]], 42
-; CHECK-NEXT:    [[A:%.*]] = add nuw i8 [[Z:%.*]], [[SX]]
 ; CHECK-NEXT:    [[SY:%.*]] = mul nsw i8 [[M:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = add nsw i8 [[A]], [[SY]]
+; CHECK-NEXT:    [[A:%.*]] = add i8 [[SX]], [[SY]]
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[A]], [[Z:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sx = mul nuw i8 %x, 42
@@ -257,9 +267,9 @@ define i8 @add_mul(i8 %x, i8 %y, i8 %z, i8 %m) {
 define float @fadd_fmul(float %x, float %y, float %z, float %m) {
 ; CHECK-LABEL: @fadd_fmul(
 ; CHECK-NEXT:    [[SX:%.*]] = fmul float [[X:%.*]], [[M:%.*]]
-; CHECK-NEXT:    [[A:%.*]] = fadd fast float [[SX]], [[Z:%.*]]
 ; CHECK-NEXT:    [[SY:%.*]] = fmul float [[Y:%.*]], [[M]]
-; CHECK-NEXT:    [[R:%.*]] = fadd fast float [[A]], [[SY]]
+; CHECK-NEXT:    [[A:%.*]] = fadd fast float [[SX]], [[SY]]
+; CHECK-NEXT:    [[R:%.*]] = fadd fast float [[A]], [[Z:%.*]]
 ; CHECK-NEXT:    ret float [[R]]
 ;
   %sx = fmul float %x, %m
@@ -273,8 +283,8 @@ define float @fmul_fdiv(float %x, float %y, float %z, float %m) {
 ; CHECK-LABEL: @fmul_fdiv(
 ; CHECK-NEXT:    [[SX:%.*]] = fdiv float [[X:%.*]], [[M:%.*]]
 ; CHECK-NEXT:    [[SY:%.*]] = fdiv float [[Y:%.*]], 4.200000e+01
-; CHECK-NEXT:    [[A:%.*]] = fmul fast float [[SY]], [[Z:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = fmul fast float [[A]], [[SX]]
+; CHECK-NEXT:    [[A:%.*]] = fmul fast float [[SY]], [[SX]]
+; CHECK-NEXT:    [[R:%.*]] = fmul fast float [[A]], [[Z:%.*]]
 ; CHECK-NEXT:    ret float [[R]]
 ;
   %sx = fdiv float %x, %m
@@ -284,7 +294,7 @@ define float @fmul_fdiv(float %x, float %y, float %z, float %m) {
   ret float %r
 }
 
-; Verify that debug info for modified instructions gets discarded (references become undef).
+; Verify that debug info for modified instructions is not invalid.
 
 define i32 @and_shl_dbg(i32 %x, i32 %y, i32 %z, i32 %shamt) {
 ; CHECK-LABEL: @and_shl_dbg(
@@ -296,11 +306,11 @@ define i32 @and_shl_dbg(i32 %x, i32 %y, i32 %z, i32 %shamt) {
 ; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 [[SHL]], metadata !16, metadata !DIExpression()), !dbg !25
 ; CHECK-NEXT:    [[SHL1:%.*]] = shl i32 [[Y]], [[SHAMT]], !dbg !26
 ; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 [[SHL1]], metadata !17, metadata !DIExpression()), !dbg !27
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SHL]], [[Z]], !dbg !28
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 [[AND]], metadata !18, metadata !DIExpression()), !dbg !29
-; CHECK-NEXT:    [[AND2:%.*]] = and i32 [[AND]], [[SHL1]], !dbg !30
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 [[AND2]], metadata !19, metadata !DIExpression()), !dbg !31
-; CHECK-NEXT:    ret i32 [[AND2]], !dbg !32
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata !2, metadata !18, metadata !DIExpression()), !dbg !28
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[SHL]], [[SHL1]], !dbg !29
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[Z]], !dbg !29
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 [[TMP2]], metadata !19, metadata !DIExpression()), !dbg !30
+; CHECK-NEXT:    ret i32 [[TMP2]], !dbg !31
 ;
   call void @llvm.dbg.value(metadata i32 %x, metadata !13, metadata !DIExpression()), !dbg !21
   call void @llvm.dbg.value(metadata i32 %y, metadata !14, metadata !DIExpression()), !dbg !22