diff options
-rw-r--r-- | llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 116 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-jumptable.ll | 13 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/branch-folder-merge-mmos.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/ifcvt-select.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll | 24 | ||||
-rw-r--r-- | llvm/test/MC/ARM/data-in-code.ll | 4 | ||||
-rw-r--r-- | llvm/test/Transforms/SimplifyCFG/sink-common-code.ll | 110 |
7 files changed, 222 insertions, 49 deletions
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index a0d45413bd1..a9fa7e7ae9b 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1547,20 +1547,62 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { assert(BI1->isUnconditional()); BasicBlock *BBEnd = BI1->getSuccessor(0); - // We currently only support branch targets with two predecessors. - // FIXME: this is an arbitrary restriction and should be lifted. - SmallVector<BasicBlock*,4> Blocks; - for (auto *BB : predecessors(BBEnd)) - Blocks.push_back(BB); - if (Blocks.size() != 2 || - !all_of(Blocks, [](const BasicBlock *BB) { - auto *BI = dyn_cast<BranchInst>(BB->getTerminator()); - return BI && BI->isUnconditional(); - })) + // We support two situations: + // (1) all incoming arcs are unconditional + // (2) one incoming arc is conditional + // + // (2) is very common in switch defaults and + // else-if patterns; + // + // if (a) f(1); + // else if (b) f(2); + // + // produces: + // + // [if] + // / \ + // [f(1)] [if] + // | | \ + // | | \ + // | [f(2)]| + // \ | / + // [ end ] + // + // [end] has two unconditional predecessor arcs and one conditional. The + // conditional refers to the implicit empty 'else' arc. This conditional + // arc can also be caused by an empty default block in a switch. + // + // In this case, we attempt to sink code from all *unconditional* arcs. + // If we can sink instructions from these arcs (determined during the scan + // phase below) we insert a common successor for all unconditional arcs and + // connect that to [end], to enable sinking: + // + // [if] + // / \ + // [x(1)] [if] + // | | \ + // | | \ + // | [x(2)] | + // \ / | + // [sink.split] | + // \ / + // [ end ] + // + SmallVector<BasicBlock*,4> UnconditionalPreds; + Instruction *Cond = nullptr; + for (auto *B : predecessors(BBEnd)) { + auto *T = B->getTerminator(); + if (isa<BranchInst>(T) && cast<BranchInst>(T)->isUnconditional()) + UnconditionalPreds.push_back(B); + else if ((isa<BranchInst>(T) || isa<SwitchInst>(T)) && !Cond) + Cond = T; + else + return false; + } + if (UnconditionalPreds.size() < 2) return false; - + bool Changed = false; - // We take a two-step approach to tail sinking. First we scan from the end of // each block upwards in lockstep. If the n'th instruction from the end of each // block can be sunk, those instructions are added to ValuesToSink and we @@ -1570,7 +1612,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { unsigned ScanIdx = 0; SmallPtrSet<Value*,4> InstructionsToSink; DenseMap<Instruction*, SmallVector<Value*,4>> PHIOperands; - LockstepReverseIterator LRI(Blocks); + LockstepReverseIterator LRI(UnconditionalPreds); while (LRI.isValid() && canSinkInstructions(*LRI, PHIOperands)) { DEBUG(dbgs() << "SINK: instruction can be sunk: " << (*LRI)[0] << "\n"); @@ -1579,6 +1621,35 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { --LRI; } + auto ProfitableToSinkLastInstruction = [&]() { + LRI.reset(); + unsigned NumPHIdValues = 0; + for (auto *I : *LRI) + for (auto *V : PHIOperands[I]) + if (InstructionsToSink.count(V) == 0) + ++NumPHIdValues; + DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n"); + assert((NumPHIdValues % UnconditionalPreds.size() == 0) && + "Every operand must either be PHId or not PHId!"); + + return NumPHIdValues / UnconditionalPreds.size() <= 1; + }; + + if (ScanIdx > 0 && Cond) { + // Check if we would actually sink anything first! + if (!ProfitableToSinkLastInstruction()) + return false; + + DEBUG(dbgs() << "SINK: Splitting edge\n"); + // We have a conditional edge and we're going to sink some instructions. + // Insert a new block postdominating all blocks we're going to sink from. + if (!SplitBlockPredecessors(BI1->getSuccessor(0), UnconditionalPreds, + ".sink.split")) + // Edges couldn't be split. + return false; + Changed = true; + } + // Now that we've analyzed all potential sinking candidates, perform the // actual sink. We iteratively sink the last non-terminator of the source // blocks into their common successor unless doing so would require too @@ -1593,29 +1664,20 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { // This is unlikely in practice though. for (unsigned SinkIdx = 0; SinkIdx != ScanIdx; ++SinkIdx) { DEBUG(dbgs() << "SINK: Sink: " - << *Blocks[0]->getTerminator()->getPrevNode() + << *UnconditionalPreds[0]->getTerminator()->getPrevNode() << "\n"); // Because we've sunk every instruction in turn, the current instruction to // sink is always at index 0. - LRI.reset(); - unsigned NumPHIdValues = 0; - for (auto *I : *LRI) - for (auto *V : PHIOperands[I]) - if (InstructionsToSink.count(V) == 0) - ++NumPHIdValues; - DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n"); - assert((NumPHIdValues % Blocks.size() == 0) && - "Every operand must either be PHId or not PHId!"); - - if (NumPHIdValues / Blocks.size() > 1) + if (!ProfitableToSinkLastInstruction()) { // Too many PHIs would be created. + DEBUG(dbgs() << "SINK: stopping here, too many PHIs would be created!\n"); break; + } - sinkLastInstruction(Blocks); + sinkLastInstruction(UnconditionalPreds); NumSinkCommons++; Changed = true; } - return Changed; } diff --git a/llvm/test/CodeGen/AArch64/arm64-jumptable.ll b/llvm/test/CodeGen/AArch64/arm64-jumptable.ll index 4635cfe5858..c7f213fa846 100644 --- a/llvm/test/CodeGen/AArch64/arm64-jumptable.ll +++ b/llvm/test/CodeGen/AArch64/arm64-jumptable.ll @@ -2,25 +2,26 @@ ; RUN: llc -mtriple=arm64-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-LINUX ; <rdar://11417675> -define void @sum(i32* %to) { +define void @sum(i32 %a, i32* %to, i32 %c) { entry: - switch i32 undef, label %exit [ + switch i32 %a, label %exit [ i32 1, label %bb1 i32 2, label %bb2 i32 3, label %bb3 i32 4, label %bb4 ] bb1: - store i32 undef, i32* %to + %b = add i32 %c, 1 + store i32 %b, i32* %to br label %exit bb2: - store i32 undef, i32* %to + store i32 2, i32* %to br label %exit bb3: - store i32 undef, i32* %to + store i32 3, i32* %to br label %exit bb4: - store i32 undef, i32* %to + store i32 4, i32* %to br label %exit exit: ret void diff --git a/llvm/test/CodeGen/AArch64/branch-folder-merge-mmos.ll b/llvm/test/CodeGen/AArch64/branch-folder-merge-mmos.ll index 12e6f2d4889..3ecb1d49ee1 100644 --- a/llvm/test/CodeGen/AArch64/branch-folder-merge-mmos.ll +++ b/llvm/test/CodeGen/AArch64/branch-folder-merge-mmos.ll @@ -3,7 +3,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" ; Function Attrs: norecurse nounwind define void @foo(i32 %a, i32 %b, float* nocapture %foo_arr) #0 { -; CHECK: (load 4 from %ir.arrayidx1.{{i[1-2]}}), (load 4 from %ir.arrayidx1.{{i[1-2]}}) +; CHECK: (load 4 from %ir.arrayidx1.{{i[1-2]}}) entry: %cmp = icmp sgt i32 %a, 0 br i1 %cmp, label %if.then, label %if.end diff --git a/llvm/test/CodeGen/AArch64/ifcvt-select.ll b/llvm/test/CodeGen/AArch64/ifcvt-select.ll index 4e024d963f2..b4815c4fb3e 100644 --- a/llvm/test/CodeGen/AArch64/ifcvt-select.ll +++ b/llvm/test/CodeGen/AArch64/ifcvt-select.ll @@ -4,7 +4,7 @@ define i32 @foo(i32 %a, i32 %b) { entry: ;CHECK-LABEL: foo: -;CHECK: csinc +;CHECK: cneg ;CHECK-NOT: csel %sub = sub nsw i32 %b, %a %cmp10 = icmp sgt i32 %a, 0 diff --git a/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll b/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll index f66af7fd627..22d0584f63b 100644 --- a/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll +++ b/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll @@ -11,9 +11,9 @@ define void @test_i16_2cmp_signed_1() { ; CHECK-LABEL: test_i16_2cmp_signed_1 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.gt +; CHECK-NEXT: b.lt ; CHECK-NOT: cmp -; CHECK: b.ne +; CHECK: ret entry: %0 = load i16, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2 %1 = load i16, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2 @@ -39,7 +39,7 @@ if.end8: ; preds = %if.else, %if.then7, define void @test_i16_2cmp_signed_2() { ; CHECK-LABEL: test_i16_2cmp_signed_2 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.le +; CHECK-NEXT: b.gt ; CHECK-NOT: cmp ; CHECK: b.ge entry: @@ -67,9 +67,9 @@ if.end8: ; preds = %if.else, %if.then7, define void @test_i16_2cmp_unsigned_1() { ; CHECK-LABEL: test_i16_2cmp_unsigned_1 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.hi +; CHECK-NEXT: b.lo ; CHECK-NOT: cmp -; CHECK: b.ne +; CHECK: ret entry: %0 = load i16, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2 %1 = load i16, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2 @@ -95,7 +95,7 @@ if.end8: ; preds = %if.else, %if.then7, define void @test_i16_2cmp_unsigned_2() { ; CHECK-LABEL: test_i16_2cmp_unsigned_2 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.ls +; CHECK-NEXT: b.hi ; CHECK-NOT: cmp ; CHECK: b.hs entry: @@ -132,9 +132,9 @@ if.end8: ; preds = %if.else, %if.then7, define void @test_i8_2cmp_signed_1() { ; CHECK-LABEL: test_i8_2cmp_signed_1 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.gt +; CHECK-NEXT: b.lt ; CHECK-NOT: cmp -; CHECK: b.ne +; CHECK: ret entry: %0 = load i8, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2 %1 = load i8, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2 @@ -160,7 +160,7 @@ if.end8: ; preds = %if.else, %if.then7, define void @test_i8_2cmp_signed_2() { ; CHECK-LABEL: test_i8_2cmp_signed_2 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.le +; CHECK-NEXT: b.gt ; CHECK-NOT: cmp ; CHECK: b.ge entry: @@ -188,9 +188,9 @@ if.end8: ; preds = %if.else, %if.then7, define void @test_i8_2cmp_unsigned_1() { ; CHECK-LABEL: test_i8_2cmp_unsigned_1 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.hi +; CHECK-NEXT: b.lo ; CHECK-NOT: cmp -; CHECK: b.ne +; CHECK: ret entry: %0 = load i8, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2 %1 = load i8, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2 @@ -216,7 +216,7 @@ if.end8: ; preds = %if.else, %if.then7, define void @test_i8_2cmp_unsigned_2() { ; CHECK-LABEL: test_i8_2cmp_unsigned_2 ; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK-NEXT: b.ls +; CHECK-NEXT: b.hi ; CHECK-NOT: cmp ; CHECK: b.hs entry: diff --git a/llvm/test/MC/ARM/data-in-code.ll b/llvm/test/MC/ARM/data-in-code.ll index 10657a3fed3..c2194e9179c 100644 --- a/llvm/test/MC/ARM/data-in-code.ll +++ b/llvm/test/MC/ARM/data-in-code.ll @@ -9,7 +9,7 @@ ;; Ensure that if a jump table is generated that it has Mapping Symbols ;; marking the data-in-code region. -define void @foo(i32* %ptr) nounwind ssp { +define void @foo(i32* %ptr, i32 %b) nounwind ssp { %tmp = load i32, i32* %ptr, align 4 switch i32 %tmp, label %exit [ i32 0, label %bb0 @@ -18,7 +18,7 @@ define void @foo(i32* %ptr) nounwind ssp { i32 3, label %bb3 ] bb0: - store i32 0, i32* %ptr, align 4 + store i32 %b, i32* %ptr, align 4 br label %exit bb1: store i32 1, i32* %ptr, align 4 diff --git a/llvm/test/Transforms/SimplifyCFG/sink-common-code.ll b/llvm/test/Transforms/SimplifyCFG/sink-common-code.ll index 293b7f37c6e..1a4884a6454 100644 --- a/llvm/test/Transforms/SimplifyCFG/sink-common-code.ll +++ b/llvm/test/Transforms/SimplifyCFG/sink-common-code.ll @@ -427,6 +427,116 @@ if.end: ; CHECK: load ; CHECK-NOT: load +define zeroext i1 @test16(i1 zeroext %flag, i1 zeroext %flag2, i32 %blksA, i32 %blksB, i32 %nblks) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = zext i1 %cmp to i8 + br label %if.end + +if.else: + br i1 %flag2, label %if.then2, label %if.end + +if.then2: + %add = add i32 %nblks, %blksB + %cmp2 = icmp ule i32 %add, %blksA + %frombool3 = zext i1 %cmp2 to i8 + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ 0, %if.else ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} + +; CHECK-LABEL: test16 +; CHECK: zext +; CHECK-NOT: zext + +define zeroext i1 @test17(i32 %flag, i32 %blksA, i32 %blksB, i32 %nblks) { +entry: + switch i32 %flag, label %if.end [ + i32 0, label %if.then + i32 1, label %if.then2 + ] + +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = zext i1 %cmp to i8 + br label %if.end + +if.then2: + %add = add i32 %nblks, %blksB + %cmp2 = icmp ule i32 %add, %blksA + %frombool3 = zext i1 %cmp2 to i8 + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ 0, %entry ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} + +; CHECK-LABEL: test17 +; CHECK: if.then: +; CHECK-NEXT: icmp uge +; CHECK-NEXT: br label %[[x:.*]] + +; CHECK: if.then2: +; CHECK-NEXT: add +; CHECK-NEXT: icmp ule +; CHECK-NEXT: br label %[[x]] + +; CHECK: [[x]]: +; CHECK-NEXT: %[[y:.*]] = phi i1 [ %cmp +; CHECK-NEXT: %[[z:.*]] = zext i1 %[[y]] +; CHECK-NEXT: br label %if.end + +; CHECK: if.end: +; CHECK-NEXT: phi i8 +; CHECK-DAG: [ %[[z]], %[[x]] ] +; CHECK-DAG: [ 0, %entry ] + +define zeroext i1 @test18(i32 %flag, i32 %blksA, i32 %blksB, i32 %nblks) { +entry: + switch i32 %flag, label %if.then3 [ + i32 0, label %if.then + i32 1, label %if.then2 + ] + +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = zext i1 %cmp to i8 + br label %if.end + +if.then2: + %add = add i32 %nblks, %blksB + %cmp2 = icmp ule i32 %add, %blksA + %frombool3 = zext i1 %cmp2 to i8 + br label %if.end + +if.then3: + %add2 = add i32 %nblks, %blksA + %cmp3 = icmp ule i32 %add2, %blksA + %frombool4 = zext i1 %cmp3 to i8 + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ %frombool4, %if.then3 ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} + +; CHECK-LABEL: test18 +; CHECK: if.end: +; CHECK-NEXT: %[[x:.*]] = phi i1 +; CHECK-DAG: [ %cmp, %if.then ] +; CHECK-DAG: [ %cmp2, %if.then2 ] +; CHECK-DAG: [ %cmp3, %if.then3 ] +; CHECK-NEXT: zext i1 %[[x]] to i8 + ; CHECK: !0 = !{!1, !1, i64 0} ; CHECK: !1 = !{!"float", !2} ; CHECK: !2 = !{!"an example type tree"} |