diff options
-rw-r--r-- | DIFF | 661 | ||||
-rw-r--r-- | llvm/include/llvm/Analysis/BranchProbabilityInfo.h | 7 | ||||
-rw-r--r-- | llvm/lib/Analysis/BranchProbabilityInfo.cpp | 132 | ||||
-rw-r--r-- | llvm/test/Analysis/BranchProbabilityInfo/basic.ll | 18 | ||||
-rw-r--r-- | llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll | 26 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/pr36292.ll | 5 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/sms-cpy-1.ll | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/block-placement.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pr37916.ll | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll | 160 |
10 files changed, 868 insertions, 147 deletions
@@ -0,0 +1,661 @@ +diff --git a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h +index c8965936fb9..41d6c23b8d0 100644 +--- a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h ++++ b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h +@@ -34,6 +34,7 @@ namespace llvm { + class Function; + class LoopInfo; + class raw_ostream; ++class PostDominatorTree; + class TargetLibraryInfo; + class Value; + +@@ -187,8 +188,10 @@ private: + /// Track the set of blocks that always lead to a cold call. + SmallPtrSet<const BasicBlock *, 16> PostDominatedByColdCall; + +- void updatePostDominatedByUnreachable(const BasicBlock *BB); +- void updatePostDominatedByColdCall(const BasicBlock *BB); ++ void computePostDominatedByUnreachable(const Function &F, ++ PostDominatorTree *PDT); ++ void computePostDominatedByColdCall(const Function &F, ++ PostDominatorTree *PDT); + bool calcUnreachableHeuristics(const BasicBlock *BB); + bool calcMetadataWeights(const BasicBlock *BB); + bool calcColdCallHeuristics(const BasicBlock *BB); +diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp +index 7bd237b9ad5..ffba65b5ed5 100644 +--- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp ++++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp +@@ -16,6 +16,7 @@ + #include "llvm/ADT/STLExtras.h" + #include "llvm/ADT/SmallVector.h" + #include "llvm/Analysis/LoopInfo.h" ++#include "llvm/Analysis/PostDominators.h" + #include "llvm/Analysis/TargetLibraryInfo.h" + #include "llvm/IR/Attributes.h" + #include "llvm/IR/BasicBlock.h" +@@ -146,69 +147,83 @@ static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1; + /// instruction. This is essentially never taken. + static const uint32_t IH_NONTAKEN_WEIGHT = 1; + +-/// Add \p BB to PostDominatedByUnreachable set if applicable. +-void +-BranchProbabilityInfo::updatePostDominatedByUnreachable(const BasicBlock *BB) { +- const Instruction *TI = BB->getTerminator(); +- if (TI->getNumSuccessors() == 0) { +- if (isa<UnreachableInst>(TI) || +- // If this block is terminated by a call to +- // @llvm.experimental.deoptimize then treat it like an unreachable since +- // the @llvm.experimental.deoptimize call is expected to practically +- // never execute. +- BB->getTerminatingDeoptimizeCall()) +- PostDominatedByUnreachable.insert(BB); +- return; +- } ++static void UpdatePDTWorklist(const BasicBlock *BB, PostDominatorTree *PDT, ++ SmallVectorImpl<const BasicBlock *> &WorkList, ++ SmallPtrSetImpl<const BasicBlock *> &TargetSet) { ++ SmallVector<BasicBlock *, 8> Descendants; ++ SmallPtrSet<const BasicBlock *, 16> NewItems; ++ ++ PDT->getDescendants(const_cast<BasicBlock *>(BB), Descendants); ++ for (auto *BB : Descendants) ++ if (TargetSet.insert(BB).second) ++ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) ++ if (!TargetSet.count(*PI)) ++ NewItems.insert(*PI); ++ WorkList.insert(WorkList.end(), NewItems.begin(), NewItems.end()); ++} + +- // If the terminator is an InvokeInst, check only the normal destination block +- // as the unwind edge of InvokeInst is also very unlikely taken. +- if (auto *II = dyn_cast<InvokeInst>(TI)) { +- if (PostDominatedByUnreachable.count(II->getNormalDest())) +- PostDominatedByUnreachable.insert(BB); +- return; ++/// Compute a set of basic blocks that are post-dominated by unreachables. ++void BranchProbabilityInfo::computePostDominatedByUnreachable( ++ const Function &F, PostDominatorTree *PDT) { ++ SmallVector<const BasicBlock *, 8> WorkList; ++ for (auto &BB : F) { ++ const Instruction *TI = BB.getTerminator(); ++ if (TI->getNumSuccessors() == 0) { ++ if (isa<UnreachableInst>(TI) || ++ // If this block is terminated by a call to ++ // @llvm.experimental.deoptimize then treat it like an unreachable ++ // since the @llvm.experimental.deoptimize call is expected to ++ // practically never execute. ++ BB.getTerminatingDeoptimizeCall()) ++ UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByUnreachable); ++ } + } + +- for (auto *I : successors(BB)) +- // If any of successor is not post dominated then BB is also not. +- if (!PostDominatedByUnreachable.count(I)) +- return; +- +- PostDominatedByUnreachable.insert(BB); ++ while (!WorkList.empty()) { ++ const BasicBlock *BB = WorkList.pop_back_val(); ++ if (PostDominatedByUnreachable.count(BB)) ++ continue; ++ // If the terminator is an InvokeInst, check only the normal destination ++ // block as the unwind edge of InvokeInst is also very unlikely taken. ++ if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) { ++ if (PostDominatedByUnreachable.count(II->getNormalDest())) ++ UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable); ++ } ++ // If all the successors are unreachable, BB is unreachable as well. ++ else if (!successors(BB).empty() && ++ llvm::all_of(successors(BB), [this](const BasicBlock *Succ) { ++ return PostDominatedByUnreachable.count(Succ); ++ })) ++ UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable); ++ } + } + +-/// Add \p BB to PostDominatedByColdCall set if applicable. +-void +-BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) { +- assert(!PostDominatedByColdCall.count(BB)); +- const Instruction *TI = BB->getTerminator(); +- if (TI->getNumSuccessors() == 0) +- return; ++/// compute a set of basic blocks that are post-dominated by ColdCalls. ++void BranchProbabilityInfo::computePostDominatedByColdCall( ++ const Function &F, PostDominatorTree *PDT) { ++ SmallVector<const BasicBlock *, 8> WorkList; ++ for (auto &BB : F) ++ for (auto &I : BB) ++ if (const CallInst *CI = dyn_cast<CallInst>(&I)) ++ if (CI->hasFnAttr(Attribute::Cold)) ++ UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByColdCall); + +- // If all of successor are post dominated then BB is also done. +- if (llvm::all_of(successors(BB), [&](const BasicBlock *SuccBB) { +- return PostDominatedByColdCall.count(SuccBB); +- })) { +- PostDominatedByColdCall.insert(BB); +- return; +- } ++ while (!WorkList.empty()) { ++ const BasicBlock *BB = WorkList.pop_back_val(); + +- // If the terminator is an InvokeInst, check only the normal destination +- // block as the unwind edge of InvokeInst is also very unlikely taken. +- if (auto *II = dyn_cast<InvokeInst>(TI)) +- if (PostDominatedByColdCall.count(II->getNormalDest())) { +- PostDominatedByColdCall.insert(BB); +- return; ++ // If the terminator is an InvokeInst, check only the normal destination ++ // block as the unwind edge of InvokeInst is also very unlikely taken. ++ if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) { ++ if (PostDominatedByColdCall.count(II->getNormalDest())) ++ UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall); + } +- +- // Otherwise, if the block itself contains a cold function, add it to the +- // set of blocks post-dominated by a cold call. +- for (auto &I : *BB) +- if (const CallInst *CI = dyn_cast<CallInst>(&I)) +- if (CI->hasFnAttr(Attribute::Cold)) { +- PostDominatedByColdCall.insert(BB); +- return; +- } ++ // If all of successor are post dominated then BB is also done. ++ else if (!successors(BB).empty() && ++ llvm::all_of(successors(BB), [this](const BasicBlock *Succ) { ++ return PostDominatedByColdCall.count(Succ); ++ })) ++ UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall); ++ } + } + + /// Calculate edge weights for successors lead to unreachable. +@@ -983,13 +998,16 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, + LLVM_DEBUG(dbgs() << "\n"); + } + ++ std::unique_ptr<PostDominatorTree> PDT = ++ std::make_unique<PostDominatorTree>(const_cast<Function &>(F)); ++ computePostDominatedByUnreachable(F, PDT.get()); ++ computePostDominatedByColdCall(F, PDT.get()); ++ + // Walk the basic blocks in post-order so that we can build up state about + // the successors of a block iteratively. + for (auto BB : post_order(&F.getEntryBlock())) { + LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName() + << "\n"); +- updatePostDominatedByUnreachable(BB); +- updatePostDominatedByColdCall(BB); + // If there is no at least two successors, no sense to set probability. + if (BB->getTerminator()->getNumSuccessors() < 2) + continue; +diff --git a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll +index 64e0a82456f..8212cc47690 100644 +--- a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll ++++ b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll +@@ -141,6 +141,24 @@ exit: + ret i32 %result + } + ++define i32 @test_cold_loop(i32 %a, i32 %b) { ++entry: ++ %cond1 = icmp eq i32 %a, 42 ++ br i1 %cond1, label %header, label %exit ++ ++header: ++ br label %body ++ ++body: ++ %cond2 = icmp eq i32 %b, 42 ++ br i1 %cond2, label %header, label %exit ++; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00% ++ ++exit: ++ call void @coldfunc() ++ ret i32 %b ++} ++ + declare i32 @regular_function(i32 %i) + + define i32 @test_cold_call_sites_with_prof(i32 %a, i32 %b, i1 %flag, i1 %flag2) { +diff --git a/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll b/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll +index 0566ca16c2f..6e01afd2cfc 100644 +--- a/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll ++++ b/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll +@@ -79,6 +79,32 @@ exit: + ret i32 %b + } + ++define i32 @test4(i32 %a, i32 %b) { ++; CHECK: Printing analysis {{.*}} for function 'test4' ++; Make sure we handle loops post-dominated by unreachables. ++entry: ++ %cond1 = icmp eq i32 %a, 42 ++ br i1 %cond1, label %header, label %exit ++; CHECK: edge entry -> header probability is 0x00000001 / 0x80000000 = 0.00% ++; CHECK: edge entry -> exit probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] ++ ++header: ++ br label %body ++ ++body: ++ %cond2 = icmp eq i32 %a, 42 ++ br i1 %cond2, label %header, label %abort ++; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00% ++; CHECK: edge body -> abort probability is 0x40000000 / 0x80000000 = 50.00% ++ ++abort: ++ call void @abort() noreturn ++ unreachable ++ ++exit: ++ ret i32 %b ++} ++ + @_ZTIi = external global i8* + + ; CHECK-LABEL: throwSmallException +diff --git a/llvm/test/CodeGen/PowerPC/pr36292.ll b/llvm/test/CodeGen/PowerPC/pr36292.ll +index 883d26b6690..a859121bb50 100644 +--- a/llvm/test/CodeGen/PowerPC/pr36292.ll ++++ b/llvm/test/CodeGen/PowerPC/pr36292.ll +@@ -15,8 +15,7 @@ define void @test() nounwind comdat { + ; CHECK-NEXT: ld 29, 0(3) + ; CHECK-NEXT: ld 30, 32(1) + ; CHECK-NEXT: cmpld 30, 29 +-; CHECK-NEXT: bge- 0, .LBB0_2 +-; CHECK-NEXT: .p2align 5 ++; CHECK-NEXT: bge 0, .LBB0_2 + ; CHECK-NEXT: .LBB0_1: # %bounds.ok + ; CHECK-NEXT: # + ; CHECK-NEXT: lfsx 2, 0, 3 +@@ -26,7 +25,7 @@ define void @test() nounwind comdat { + ; CHECK-NEXT: addi 30, 30, 1 + ; CHECK-NEXT: stfsx 1, 0, 3 + ; CHECK-NEXT: cmpld 30, 29 +-; CHECK-NEXT: blt+ 0, .LBB0_1 ++; CHECK-NEXT: blt 0, .LBB0_1 + ; CHECK-NEXT: .LBB0_2: # %bounds.fail + ; CHECK-NEXT: std 30, 32(1) + %pos = alloca i64, align 8 +diff --git a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll +index 8fdcd1eac45..7804b0a3f09 100644 +--- a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll ++++ b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll +@@ -44,7 +44,6 @@ define void @print_res() nounwind { + ; CHECK-NEXT: lbz 5, 0(5) + ; CHECK-NEXT: addi 3, 3, 1 + ; CHECK-NEXT: bdz .LBB0_4 +-; CHECK-NEXT: .p2align 4 + ; CHECK-NEXT: .LBB0_3: # + ; CHECK-NEXT: clrldi 10, 8, 32 + ; CHECK-NEXT: cntlzw 9, 6 +diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll +index acc4b7e1381..258cc2031ae 100644 +--- a/llvm/test/CodeGen/X86/block-placement.ll ++++ b/llvm/test/CodeGen/X86/block-placement.ll +@@ -358,11 +358,11 @@ define void @unnatural_cfg2(i32* %p0, i32 %a0) { + ; CHECK: %loop.header + ; CHECK: %loop.body1 + ; CHECK: %loop.body2 ++; CHECK: %loop.body3 ++; CHECK: %loop.inner1.begin + ; CHECK: %loop.body4 + ; CHECK: %loop.inner2.begin + ; CHECK: %loop.inner2.begin +-; CHECK: %loop.body3 +-; CHECK: %loop.inner1.begin + ; CHECK: %bail + + entry: +diff --git a/llvm/test/CodeGen/X86/pr37916.ll b/llvm/test/CodeGen/X86/pr37916.ll +index 2da9413a9a0..484104da9ff 100644 +--- a/llvm/test/CodeGen/X86/pr37916.ll ++++ b/llvm/test/CodeGen/X86/pr37916.ll +@@ -7,7 +7,6 @@ + define void @fn1() local_unnamed_addr { + ; CHECK-LABEL: fn1: + ; CHECK: # %bb.0: # %entry +-; CHECK-NEXT: .p2align 4, 0x90 + ; CHECK-NEXT: .LBB0_1: # %if.end + ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: movl a+4, %eax +diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll +index 9238ab0bf89..92708d33924 100644 +--- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll ++++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll +@@ -29,8 +29,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { + ; CHECK-NEXT: .cfi_def_cfa_offset 48 + ; CHECK-NEXT: pushq %rbx + ; CHECK-NEXT: .cfi_def_cfa_offset 56 +-; CHECK-NEXT: subq $536, %rsp ## imm = 0x218 +-; CHECK-NEXT: .cfi_def_cfa_offset 592 ++; CHECK-NEXT: subq $552, %rsp ## imm = 0x228 ++; CHECK-NEXT: .cfi_def_cfa_offset 608 + ; CHECK-NEXT: .cfi_offset %rbx, -56 + ; CHECK-NEXT: .cfi_offset %r12, -48 + ; CHECK-NEXT: .cfi_offset %r13, -40 +@@ -54,7 +54,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { + ; CHECK-NEXT: testb %al, %al + ; CHECK-NEXT: je LBB0_55 + ; CHECK-NEXT: LBB0_4: ## %cleanup +-; CHECK-NEXT: addq $536, %rsp ## imm = 0x218 ++; CHECK-NEXT: addq $552, %rsp ## imm = 0x228 + ; CHECK-NEXT: popq %rbx + ; CHECK-NEXT: popq %r12 + ; CHECK-NEXT: popq %r13 +@@ -68,7 +68,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { + ; CHECK-NEXT: je LBB0_55 + ; CHECK-NEXT: ## %bb.6: ## %SyTime.exit2720 + ; CHECK-NEXT: movq %rdx, %rbx +-; CHECK-NEXT: movq %rdi, %rbp ++; CHECK-NEXT: movq %rdi, %r14 + ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax + ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx + ; CHECK-NEXT: cmpq %rax, %rcx +@@ -78,10 +78,10 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { + ; CHECK-NEXT: movl $32, %esi + ; CHECK-NEXT: callq _memset + ; CHECK-NEXT: LBB0_8: ## %while.body.preheader +-; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill + ; CHECK-NEXT: imulq $1040, %rbx, %rax ## imm = 0x410 + ; CHECK-NEXT: movq _syBuf@{{.*}}(%rip), %rcx +-; CHECK-NEXT: leaq 8(%rcx,%rax), %rbx ++; CHECK-NEXT: leaq 8(%rcx,%rax), %rax ++; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill + ; CHECK-NEXT: movl $1, %r15d + ; CHECK-NEXT: movq _syCTRO@{{.*}}(%rip), %rax + ; CHECK-NEXT: movb $1, %cl +@@ -92,69 +92,70 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { + ; CHECK-NEXT: testb %cl, %cl + ; CHECK-NEXT: jne LBB0_9 + ; CHECK-NEXT: ## %bb.10: ## %do.end +-; CHECK-NEXT: xorl %r14d, %r14d +-; CHECK-NEXT: testb %r14b, %r14b ++; CHECK-NEXT: xorl %ebp, %ebp ++; CHECK-NEXT: testb %bpl, %bpl + ; CHECK-NEXT: jne LBB0_11 + ; CHECK-NEXT: ## %bb.12: ## %while.body200.preheader +-; CHECK-NEXT: xorl %edx, %edx +-; CHECK-NEXT: leaq {{.*}}(%rip), %rsi +-; CHECK-NEXT: leaq {{.*}}(%rip), %rdi +-; CHECK-NEXT: xorl %ebp, %ebp +-; CHECK-NEXT: xorl %r13d, %r13d ++; CHECK-NEXT: xorl %ebx, %ebx ++; CHECK-NEXT: leaq {{.*}}(%rip), %r13 ++; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill ++; CHECK-NEXT: xorl %r12d, %r12d ++; CHECK-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill + ; CHECK-NEXT: jmp LBB0_13 + ; CHECK-NEXT: .p2align 4, 0x90 + ; CHECK-NEXT: LBB0_20: ## %sw.bb256 + ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +-; CHECK-NEXT: movl %r14d, %r13d ++; CHECK-NEXT: movl %ebp, %r12d + ; CHECK-NEXT: LBB0_21: ## %while.cond197.backedge + ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 + ; CHECK-NEXT: decl %r15d + ; CHECK-NEXT: testl %r15d, %r15d +-; CHECK-NEXT: movl %r13d, %r14d ++; CHECK-NEXT: movl %r12d, %ebp + ; CHECK-NEXT: jle LBB0_22 + ; CHECK-NEXT: LBB0_13: ## %while.body200 + ; CHECK-NEXT: ## =>This Loop Header: Depth=1 + ; CHECK-NEXT: ## Child Loop BB0_30 Depth 2 + ; CHECK-NEXT: ## Child Loop BB0_38 Depth 2 +-; CHECK-NEXT: leal -268(%r14), %eax ++; CHECK-NEXT: leal -268(%rbp), %eax + ; CHECK-NEXT: cmpl $105, %eax + ; CHECK-NEXT: ja LBB0_14 + ; CHECK-NEXT: ## %bb.56: ## %while.body200 + ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +-; CHECK-NEXT: movslq (%rdi,%rax,4), %rax +-; CHECK-NEXT: addq %rdi, %rax ++; CHECK-NEXT: movslq (%r13,%rax,4), %rax ++; CHECK-NEXT: addq %r13, %rax + ; CHECK-NEXT: jmpq *%rax + ; CHECK-NEXT: LBB0_44: ## %while.cond1037.preheader + ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +-; CHECK-NEXT: testb %dl, %dl +-; CHECK-NEXT: movl %r14d, %r13d ++; CHECK-NEXT: testb %bl, %bl ++; CHECK-NEXT: movl %ebp, %r12d + ; CHECK-NEXT: jne LBB0_21 + ; CHECK-NEXT: jmp LBB0_55 + ; CHECK-NEXT: .p2align 4, 0x90 + ; CHECK-NEXT: LBB0_14: ## %while.body200 + ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +-; CHECK-NEXT: leal 1(%r14), %eax ++; CHECK-NEXT: leal 1(%rbp), %eax + ; CHECK-NEXT: cmpl $21, %eax + ; CHECK-NEXT: ja LBB0_20 + ; CHECK-NEXT: ## %bb.15: ## %while.body200 + ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +-; CHECK-NEXT: movl $-1, %r13d +-; CHECK-NEXT: movslq (%rsi,%rax,4), %rax +-; CHECK-NEXT: addq %rsi, %rax ++; CHECK-NEXT: movl $-1, %r12d ++; CHECK-NEXT: leaq {{.*}}(%rip), %rcx ++; CHECK-NEXT: movslq (%rcx,%rax,4), %rax ++; CHECK-NEXT: addq %rcx, %rax + ; CHECK-NEXT: jmpq *%rax + ; CHECK-NEXT: LBB0_18: ## %while.cond201.preheader + ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +-; CHECK-NEXT: movl $1, %r13d ++; CHECK-NEXT: movl $1, %r12d + ; CHECK-NEXT: jmp LBB0_21 + ; CHECK-NEXT: LBB0_26: ## %sw.bb474 + ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +-; CHECK-NEXT: testb %dl, %dl +-; CHECK-NEXT: ## implicit-def: $r12 ++; CHECK-NEXT: testb %bl, %bl ++; CHECK-NEXT: ## implicit-def: $r14 + ; CHECK-NEXT: jne LBB0_34 + ; CHECK-NEXT: ## %bb.27: ## %do.body479.preheader + ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +-; CHECK-NEXT: testb %dl, %dl +-; CHECK-NEXT: ## implicit-def: $r12 ++; CHECK-NEXT: testb %bl, %bl ++; CHECK-NEXT: ## implicit-def: $r14 + ; CHECK-NEXT: jne LBB0_34 + ; CHECK-NEXT: ## %bb.28: ## %land.rhs485.preheader + ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +@@ -165,8 +166,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { + ; CHECK-NEXT: .p2align 4, 0x90 + ; CHECK-NEXT: LBB0_32: ## %do.body479.backedge + ; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 +-; CHECK-NEXT: leaq 1(%r12), %rax +-; CHECK-NEXT: testb %dl, %dl ++; CHECK-NEXT: leaq 1(%r14), %rax ++; CHECK-NEXT: testb %bl, %bl + ; CHECK-NEXT: je LBB0_33 + ; CHECK-NEXT: ## %bb.29: ## %land.rhs485 + ; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 +@@ -175,15 +176,14 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { + ; CHECK-NEXT: LBB0_30: ## %cond.true.i.i2780 + ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 + ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 +-; CHECK-NEXT: movq %rax, %r12 +-; CHECK-NEXT: testb %dl, %dl ++; CHECK-NEXT: movq %rax, %r14 ++; CHECK-NEXT: testb %bl, %bl + ; CHECK-NEXT: jne LBB0_32 + ; CHECK-NEXT: ## %bb.31: ## %lor.rhs500 + ; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 + ; CHECK-NEXT: movl $256, %esi ## imm = 0x100 + ; CHECK-NEXT: callq ___maskrune +-; CHECK-NEXT: xorl %edx, %edx +-; CHECK-NEXT: testb %dl, %dl ++; CHECK-NEXT: testb %bl, %bl + ; CHECK-NEXT: jne LBB0_32 + ; CHECK-NEXT: jmp LBB0_34 + ; CHECK-NEXT: LBB0_45: ## %sw.bb1134 +@@ -193,23 +193,23 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { + ; CHECK-NEXT: cmpq %rax, %rcx + ; CHECK-NEXT: jb LBB0_55 + ; CHECK-NEXT: ## %bb.46: ## in Loop: Header=BB0_13 Depth=1 +-; CHECK-NEXT: xorl %ebp, %ebp +-; CHECK-NEXT: movl $268, %r13d ## imm = 0x10C ++; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill ++; CHECK-NEXT: movl $268, %r12d ## imm = 0x10C + ; CHECK-NEXT: jmp LBB0_21 +-; CHECK-NEXT: LBB0_19: ## %sw.bb243 ++; CHECK-NEXT: LBB0_40: ## %sw.bb566 + ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +-; CHECK-NEXT: movl $2, %r13d ++; CHECK-NEXT: movl $20, %r12d + ; CHECK-NEXT: jmp LBB0_21 +-; CHECK-NEXT: LBB0_40: ## %sw.bb566 ++; CHECK-NEXT: LBB0_19: ## %sw.bb243 + ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +-; CHECK-NEXT: movl $20, %r13d ++; CHECK-NEXT: movl $2, %r12d + ; CHECK-NEXT: jmp LBB0_21 + ; CHECK-NEXT: LBB0_33: ## %if.end517.loopexitsplit + ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +-; CHECK-NEXT: incq %r12 ++; CHECK-NEXT: incq %r14 + ; CHECK-NEXT: LBB0_34: ## %if.end517 + ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +-; CHECK-NEXT: leal -324(%r13), %eax ++; CHECK-NEXT: leal -324(%r12), %eax + ; CHECK-NEXT: cmpl $59, %eax + ; CHECK-NEXT: ja LBB0_35 + ; CHECK-NEXT: ## %bb.57: ## %if.end517 +@@ -219,11 +219,11 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { + ; CHECK-NEXT: jb LBB0_38 + ; CHECK-NEXT: LBB0_35: ## %if.end517 + ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +-; CHECK-NEXT: cmpl $11, %r13d ++; CHECK-NEXT: cmpl $11, %r12d + ; CHECK-NEXT: je LBB0_38 + ; CHECK-NEXT: ## %bb.36: ## %if.end517 + ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +-; CHECK-NEXT: cmpl $24, %r13d ++; CHECK-NEXT: cmpl $24, %r12d + ; CHECK-NEXT: je LBB0_38 + ; CHECK-NEXT: ## %bb.37: ## %if.then532 + ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +@@ -233,15 +233,14 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { + ; CHECK-NEXT: LBB0_38: ## %for.cond534 + ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 + ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 +-; CHECK-NEXT: testb %dl, %dl ++; CHECK-NEXT: testb %bl, %bl + ; CHECK-NEXT: jne LBB0_38 + ; CHECK-NEXT: ## %bb.39: ## %for.cond542.preheader + ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +-; CHECK-NEXT: testb %dl, %dl +-; CHECK-NEXT: movb $0, (%r12) +-; CHECK-NEXT: movl %r14d, %r13d +-; CHECK-NEXT: leaq {{.*}}(%rip), %rsi +-; CHECK-NEXT: leaq {{.*}}(%rip), %rdi ++; CHECK-NEXT: testb %bl, %bl ++; CHECK-NEXT: movb $0, (%r14) ++; CHECK-NEXT: movl %ebp, %r12d ++; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 ## 8-byte Reload + ; CHECK-NEXT: jmp LBB0_21 + ; CHECK-NEXT: .p2align 4, 0x90 + ; CHECK-NEXT: LBB0_42: ## %while.cond864 +@@ -256,30 +255,44 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { + ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: jmp LBB0_25 + ; CHECK-NEXT: LBB0_11: +-; CHECK-NEXT: xorl %ebp, %ebp +-; CHECK-NEXT: xorl %r13d, %r13d ++; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill ++; CHECK-NEXT: xorl %r12d, %r12d + ; CHECK-NEXT: LBB0_22: ## %while.end1465 +-; CHECK-NEXT: incl %r13d +-; CHECK-NEXT: cmpl $16, %r13d ++; CHECK-NEXT: incl %r12d ++; CHECK-NEXT: cmpl $16, %r12d + ; CHECK-NEXT: ja LBB0_50 + ; CHECK-NEXT: ## %bb.23: ## %while.end1465 + ; CHECK-NEXT: movl $83969, %eax ## imm = 0x14801 +-; CHECK-NEXT: btl %r13d, %eax ++; CHECK-NEXT: btl %r12d, %eax + ; CHECK-NEXT: jae LBB0_50 + ; CHECK-NEXT: ## %bb.24: +-; CHECK-NEXT: xorl %ebp, %ebp +-; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload ++; CHECK-NEXT: xorl %ebx, %ebx + ; CHECK-NEXT: LBB0_48: ## %if.then1477 + ; CHECK-NEXT: movl $1, %edx + ; CHECK-NEXT: callq _write +-; CHECK-NEXT: subq %rbp, %rbx ++; CHECK-NEXT: subq %rbx, %r14 + ; CHECK-NEXT: movq _syHistory@{{.*}}(%rip), %rax +-; CHECK-NEXT: leaq 8189(%rbx,%rax), %rax ++; CHECK-NEXT: leaq 8189(%r14,%rax), %rax + ; CHECK-NEXT: .p2align 4, 0x90 + ; CHECK-NEXT: LBB0_49: ## %for.body1723 + ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: decq %rax + ; CHECK-NEXT: jmp LBB0_49 ++; CHECK-NEXT: LBB0_47: ## %if.then1477.loopexit ++; CHECK-NEXT: movq %r14, %rbx ++; CHECK-NEXT: jmp LBB0_48 ++; CHECK-NEXT: LBB0_16: ## %while.cond635.preheader ++; CHECK-NEXT: xorl %eax, %eax ++; CHECK-NEXT: testb %al, %al ++; CHECK-NEXT: je LBB0_41 ++; CHECK-NEXT: .p2align 4, 0x90 ++; CHECK-NEXT: LBB0_17: ## %for.body643.us ++; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: jmp LBB0_17 ++; CHECK-NEXT: .p2align 4, 0x90 ++; CHECK-NEXT: LBB0_41: ## %while.cond661 ++; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: jmp LBB0_41 + ; CHECK-NEXT: LBB0_50: ## %for.cond1480.preheader + ; CHECK-NEXT: movl $512, %eax ## imm = 0x200 + ; CHECK-NEXT: cmpq %rax, %rax +@@ -289,14 +302,15 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { + ; CHECK-NEXT: testb %al, %al + ; CHECK-NEXT: jne LBB0_54 + ; CHECK-NEXT: ## %bb.52: ## %while.body1679.preheader +-; CHECK-NEXT: incl %ebp +-; CHECK-NEXT: .p2align 4, 0x90 ++; CHECK-NEXT: incl {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill + ; CHECK-NEXT: LBB0_53: ## %while.body1679 + ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +-; CHECK-NEXT: movq (%rbx), %rdi ++; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload ++; CHECK-NEXT: movq (%rax), %rdi + ; CHECK-NEXT: callq _fileno +-; CHECK-NEXT: movslq %ebp, %rax +-; CHECK-NEXT: leal 1(%rax), %ebp ++; CHECK-NEXT: movslq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 4-byte Folded Reload ++; CHECK-NEXT: leal 1(%rax), %ecx ++; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill + ; CHECK-NEXT: cmpq %rax, %rax + ; CHECK-NEXT: jl LBB0_53 + ; CHECK-NEXT: LBB0_54: ## %while.cond1683.preheader +@@ -304,22 +318,6 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { + ; CHECK-NEXT: testb %al, %al + ; CHECK-NEXT: LBB0_55: ## %if.then.i + ; CHECK-NEXT: ud2 +-; CHECK-NEXT: LBB0_47: ## %if.then1477.loopexit +-; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload +-; CHECK-NEXT: movq %rbx, %rbp +-; CHECK-NEXT: jmp LBB0_48 +-; CHECK-NEXT: LBB0_16: ## %while.cond635.preheader +-; CHECK-NEXT: xorl %eax, %eax +-; CHECK-NEXT: testb %al, %al +-; CHECK-NEXT: je LBB0_41 +-; CHECK-NEXT: .p2align 4, 0x90 +-; CHECK-NEXT: LBB0_17: ## %for.body643.us +-; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +-; CHECK-NEXT: jmp LBB0_17 +-; CHECK-NEXT: .p2align 4, 0x90 +-; CHECK-NEXT: LBB0_41: ## %while.cond661 +-; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +-; CHECK-NEXT: jmp LBB0_41 + entry: + %sub.ptr.rhs.cast646 = ptrtoint i8* %line to i64 + %old = alloca [512 x i8], align 16 diff --git a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h index c8965936fb9..41d6c23b8d0 100644 --- a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h @@ -34,6 +34,7 @@ namespace llvm { class Function; class LoopInfo; class raw_ostream; +class PostDominatorTree; class TargetLibraryInfo; class Value; @@ -187,8 +188,10 @@ private: /// Track the set of blocks that always lead to a cold call. SmallPtrSet<const BasicBlock *, 16> PostDominatedByColdCall; - void updatePostDominatedByUnreachable(const BasicBlock *BB); - void updatePostDominatedByColdCall(const BasicBlock *BB); + void computePostDominatedByUnreachable(const Function &F, + PostDominatorTree *PDT); + void computePostDominatedByColdCall(const Function &F, + PostDominatorTree *PDT); bool calcUnreachableHeuristics(const BasicBlock *BB); bool calcMetadataWeights(const BasicBlock *BB); bool calcColdCallHeuristics(const BasicBlock *BB); diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp index 7bd237b9ad5..ffba65b5ed5 100644 --- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -146,69 +147,83 @@ static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1; /// instruction. This is essentially never taken. static const uint32_t IH_NONTAKEN_WEIGHT = 1; -/// Add \p BB to PostDominatedByUnreachable set if applicable. -void -BranchProbabilityInfo::updatePostDominatedByUnreachable(const BasicBlock *BB) { - const Instruction *TI = BB->getTerminator(); - if (TI->getNumSuccessors() == 0) { - if (isa<UnreachableInst>(TI) || - // If this block is terminated by a call to - // @llvm.experimental.deoptimize then treat it like an unreachable since - // the @llvm.experimental.deoptimize call is expected to practically - // never execute. - BB->getTerminatingDeoptimizeCall()) - PostDominatedByUnreachable.insert(BB); - return; - } +static void UpdatePDTWorklist(const BasicBlock *BB, PostDominatorTree *PDT, + SmallVectorImpl<const BasicBlock *> &WorkList, + SmallPtrSetImpl<const BasicBlock *> &TargetSet) { + SmallVector<BasicBlock *, 8> Descendants; + SmallPtrSet<const BasicBlock *, 16> NewItems; + + PDT->getDescendants(const_cast<BasicBlock *>(BB), Descendants); + for (auto *BB : Descendants) + if (TargetSet.insert(BB).second) + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + if (!TargetSet.count(*PI)) + NewItems.insert(*PI); + WorkList.insert(WorkList.end(), NewItems.begin(), NewItems.end()); +} - // If the terminator is an InvokeInst, check only the normal destination block - // as the unwind edge of InvokeInst is also very unlikely taken. - if (auto *II = dyn_cast<InvokeInst>(TI)) { - if (PostDominatedByUnreachable.count(II->getNormalDest())) - PostDominatedByUnreachable.insert(BB); - return; +/// Compute a set of basic blocks that are post-dominated by unreachables. +void BranchProbabilityInfo::computePostDominatedByUnreachable( + const Function &F, PostDominatorTree *PDT) { + SmallVector<const BasicBlock *, 8> WorkList; + for (auto &BB : F) { + const Instruction *TI = BB.getTerminator(); + if (TI->getNumSuccessors() == 0) { + if (isa<UnreachableInst>(TI) || + // If this block is terminated by a call to + // @llvm.experimental.deoptimize then treat it like an unreachable + // since the @llvm.experimental.deoptimize call is expected to + // practically never execute. + BB.getTerminatingDeoptimizeCall()) + UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByUnreachable); + } } - for (auto *I : successors(BB)) - // If any of successor is not post dominated then BB is also not. - if (!PostDominatedByUnreachable.count(I)) - return; - - PostDominatedByUnreachable.insert(BB); + while (!WorkList.empty()) { + const BasicBlock *BB = WorkList.pop_back_val(); + if (PostDominatedByUnreachable.count(BB)) + continue; + // If the terminator is an InvokeInst, check only the normal destination + // block as the unwind edge of InvokeInst is also very unlikely taken. + if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + if (PostDominatedByUnreachable.count(II->getNormalDest())) + UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable); + } + // If all the successors are unreachable, BB is unreachable as well. + else if (!successors(BB).empty() && + llvm::all_of(successors(BB), [this](const BasicBlock *Succ) { + return PostDominatedByUnreachable.count(Succ); + })) + UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable); + } } -/// Add \p BB to PostDominatedByColdCall set if applicable. -void -BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) { - assert(!PostDominatedByColdCall.count(BB)); - const Instruction *TI = BB->getTerminator(); - if (TI->getNumSuccessors() == 0) - return; +/// compute a set of basic blocks that are post-dominated by ColdCalls. +void BranchProbabilityInfo::computePostDominatedByColdCall( + const Function &F, PostDominatorTree *PDT) { + SmallVector<const BasicBlock *, 8> WorkList; + for (auto &BB : F) + for (auto &I : BB) + if (const CallInst *CI = dyn_cast<CallInst>(&I)) + if (CI->hasFnAttr(Attribute::Cold)) + UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByColdCall); - // If all of successor are post dominated then BB is also done. - if (llvm::all_of(successors(BB), [&](const BasicBlock *SuccBB) { - return PostDominatedByColdCall.count(SuccBB); - })) { - PostDominatedByColdCall.insert(BB); - return; - } + while (!WorkList.empty()) { + const BasicBlock *BB = WorkList.pop_back_val(); - // If the terminator is an InvokeInst, check only the normal destination - // block as the unwind edge of InvokeInst is also very unlikely taken. - if (auto *II = dyn_cast<InvokeInst>(TI)) - if (PostDominatedByColdCall.count(II->getNormalDest())) { - PostDominatedByColdCall.insert(BB); - return; + // If the terminator is an InvokeInst, check only the normal destination + // block as the unwind edge of InvokeInst is also very unlikely taken. + if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + if (PostDominatedByColdCall.count(II->getNormalDest())) + UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall); } - - // Otherwise, if the block itself contains a cold function, add it to the - // set of blocks post-dominated by a cold call. - for (auto &I : *BB) - if (const CallInst *CI = dyn_cast<CallInst>(&I)) - if (CI->hasFnAttr(Attribute::Cold)) { - PostDominatedByColdCall.insert(BB); - return; - } + // If all of successor are post dominated then BB is also done. + else if (!successors(BB).empty() && + llvm::all_of(successors(BB), [this](const BasicBlock *Succ) { + return PostDominatedByColdCall.count(Succ); + })) + UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall); + } } /// Calculate edge weights for successors lead to unreachable. @@ -983,13 +998,16 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, LLVM_DEBUG(dbgs() << "\n"); } + std::unique_ptr<PostDominatorTree> PDT = + std::make_unique<PostDominatorTree>(const_cast<Function &>(F)); + computePostDominatedByUnreachable(F, PDT.get()); + computePostDominatedByColdCall(F, PDT.get()); + // Walk the basic blocks in post-order so that we can build up state about // the successors of a block iteratively. for (auto BB : post_order(&F.getEntryBlock())) { LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n"); - updatePostDominatedByUnreachable(BB); - updatePostDominatedByColdCall(BB); // If there is no at least two successors, no sense to set probability. if (BB->getTerminator()->getNumSuccessors() < 2) continue; diff --git a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll index 64e0a82456f..8212cc47690 100644 --- a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll @@ -141,6 +141,24 @@ exit: ret i32 %result } +define i32 @test_cold_loop(i32 %a, i32 %b) { +entry: + %cond1 = icmp eq i32 %a, 42 + br i1 %cond1, label %header, label %exit + +header: + br label %body + +body: + %cond2 = icmp eq i32 %b, 42 + br i1 %cond2, label %header, label %exit +; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00% + +exit: + call void @coldfunc() + ret i32 %b +} + declare i32 @regular_function(i32 %i) define i32 @test_cold_call_sites_with_prof(i32 %a, i32 %b, i1 %flag, i1 %flag2) { diff --git a/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll b/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll index 0566ca16c2f..6e01afd2cfc 100644 --- a/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll @@ -79,6 +79,32 @@ exit: ret i32 %b } +define i32 @test4(i32 %a, i32 %b) { +; CHECK: Printing analysis {{.*}} for function 'test4' +; Make sure we handle loops post-dominated by unreachables. +entry: + %cond1 = icmp eq i32 %a, 42 + br i1 %cond1, label %header, label %exit +; CHECK: edge entry -> header probability is 0x00000001 / 0x80000000 = 0.00% +; CHECK: edge entry -> exit probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] + +header: + br label %body + +body: + %cond2 = icmp eq i32 %a, 42 + br i1 %cond2, label %header, label %abort +; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge body -> abort probability is 0x40000000 / 0x80000000 = 50.00% + +abort: + call void @abort() noreturn + unreachable + +exit: + ret i32 %b +} + @_ZTIi = external global i8* ; CHECK-LABEL: throwSmallException diff --git a/llvm/test/CodeGen/PowerPC/pr36292.ll b/llvm/test/CodeGen/PowerPC/pr36292.ll index 883d26b6690..a859121bb50 100644 --- a/llvm/test/CodeGen/PowerPC/pr36292.ll +++ b/llvm/test/CodeGen/PowerPC/pr36292.ll @@ -15,8 +15,7 @@ define void @test() nounwind comdat { ; CHECK-NEXT: ld 29, 0(3) ; CHECK-NEXT: ld 30, 32(1) ; CHECK-NEXT: cmpld 30, 29 -; CHECK-NEXT: bge- 0, .LBB0_2 -; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: bge 0, .LBB0_2 ; CHECK-NEXT: .LBB0_1: # %bounds.ok ; CHECK-NEXT: # ; CHECK-NEXT: lfsx 2, 0, 3 @@ -26,7 +25,7 @@ define void @test() nounwind comdat { ; CHECK-NEXT: addi 30, 30, 1 ; CHECK-NEXT: stfsx 1, 0, 3 ; CHECK-NEXT: cmpld 30, 29 -; CHECK-NEXT: blt+ 0, .LBB0_1 +; CHECK-NEXT: blt 0, .LBB0_1 ; CHECK-NEXT: .LBB0_2: # %bounds.fail ; CHECK-NEXT: std 30, 32(1) %pos = alloca i64, align 8 diff --git a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll index 8fdcd1eac45..7804b0a3f09 100644 --- a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll +++ b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll @@ -44,7 +44,6 @@ define void @print_res() nounwind { ; CHECK-NEXT: lbz 5, 0(5) ; CHECK-NEXT: addi 3, 3, 1 ; CHECK-NEXT: bdz .LBB0_4 -; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_3: # ; CHECK-NEXT: clrldi 10, 8, 32 ; CHECK-NEXT: cntlzw 9, 6 diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll index acc4b7e1381..258cc2031ae 100644 --- a/llvm/test/CodeGen/X86/block-placement.ll +++ b/llvm/test/CodeGen/X86/block-placement.ll @@ -358,11 +358,11 @@ define void @unnatural_cfg2(i32* %p0, i32 %a0) { ; CHECK: %loop.header ; CHECK: %loop.body1 ; CHECK: %loop.body2 +; CHECK: %loop.body3 +; CHECK: %loop.inner1.begin ; CHECK: %loop.body4 ; CHECK: %loop.inner2.begin ; CHECK: %loop.inner2.begin -; CHECK: %loop.body3 -; CHECK: %loop.inner1.begin ; CHECK: %bail entry: diff --git a/llvm/test/CodeGen/X86/pr37916.ll b/llvm/test/CodeGen/X86/pr37916.ll index 2da9413a9a0..484104da9ff 100644 --- a/llvm/test/CodeGen/X86/pr37916.ll +++ b/llvm/test/CodeGen/X86/pr37916.ll @@ -7,7 +7,6 @@ define void @fn1() local_unnamed_addr { ; CHECK-LABEL: fn1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_1: # %if.end ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movl a+4, %eax diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll index 9238ab0bf89..92708d33924 100644 --- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll +++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll @@ -29,8 +29,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 56 -; CHECK-NEXT: subq $536, %rsp ## imm = 0x218 -; CHECK-NEXT: .cfi_def_cfa_offset 592 +; CHECK-NEXT: subq $552, %rsp ## imm = 0x228 +; CHECK-NEXT: .cfi_def_cfa_offset 608 ; CHECK-NEXT: .cfi_offset %rbx, -56 ; CHECK-NEXT: .cfi_offset %r12, -48 ; CHECK-NEXT: .cfi_offset %r13, -40 @@ -54,7 +54,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je LBB0_55 ; CHECK-NEXT: LBB0_4: ## %cleanup -; CHECK-NEXT: addq $536, %rsp ## imm = 0x218 +; CHECK-NEXT: addq $552, %rsp ## imm = 0x228 ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 ; CHECK-NEXT: popq %r13 @@ -68,7 +68,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: je LBB0_55 ; CHECK-NEXT: ## %bb.6: ## %SyTime.exit2720 ; CHECK-NEXT: movq %rdx, %rbx -; CHECK-NEXT: movq %rdi, %rbp +; CHECK-NEXT: movq %rdi, %r14 ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: cmpq %rax, %rcx @@ -78,10 +78,10 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: movl $32, %esi ; CHECK-NEXT: callq _memset ; CHECK-NEXT: LBB0_8: ## %while.body.preheader -; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: imulq $1040, %rbx, %rax ## imm = 0x410 ; CHECK-NEXT: movq _syBuf@{{.*}}(%rip), %rcx -; CHECK-NEXT: leaq 8(%rcx,%rax), %rbx +; CHECK-NEXT: leaq 8(%rcx,%rax), %rax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: movl $1, %r15d ; CHECK-NEXT: movq _syCTRO@{{.*}}(%rip), %rax ; CHECK-NEXT: movb $1, %cl @@ -92,69 +92,70 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: jne LBB0_9 ; CHECK-NEXT: ## %bb.10: ## %do.end -; CHECK-NEXT: xorl %r14d, %r14d -; CHECK-NEXT: testb %r14b, %r14b +; CHECK-NEXT: xorl %ebp, %ebp +; CHECK-NEXT: testb %bpl, %bpl ; CHECK-NEXT: jne LBB0_11 ; CHECK-NEXT: ## %bb.12: ## %while.body200.preheader -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: leaq {{.*}}(%rip), %rsi -; CHECK-NEXT: leaq {{.*}}(%rip), %rdi -; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: xorl %r13d, %r13d +; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: leaq {{.*}}(%rip), %r13 +; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: xorl %r12d, %r12d +; CHECK-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: jmp LBB0_13 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_20: ## %sw.bb256 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl %r14d, %r13d +; CHECK-NEXT: movl %ebp, %r12d ; CHECK-NEXT: LBB0_21: ## %while.cond197.backedge ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: decl %r15d ; CHECK-NEXT: testl %r15d, %r15d -; CHECK-NEXT: movl %r13d, %r14d +; CHECK-NEXT: movl %r12d, %ebp ; CHECK-NEXT: jle LBB0_22 ; CHECK-NEXT: LBB0_13: ## %while.body200 ; CHECK-NEXT: ## =>This Loop Header: Depth=1 ; CHECK-NEXT: ## Child Loop BB0_30 Depth 2 ; CHECK-NEXT: ## Child Loop BB0_38 Depth 2 -; CHECK-NEXT: leal -268(%r14), %eax +; CHECK-NEXT: leal -268(%rbp), %eax ; CHECK-NEXT: cmpl $105, %eax ; CHECK-NEXT: ja LBB0_14 ; CHECK-NEXT: ## %bb.56: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movslq (%rdi,%rax,4), %rax -; CHECK-NEXT: addq %rdi, %rax +; CHECK-NEXT: movslq (%r13,%rax,4), %rax +; CHECK-NEXT: addq %r13, %rax ; CHECK-NEXT: jmpq *%rax ; CHECK-NEXT: LBB0_44: ## %while.cond1037.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: movl %r14d, %r13d +; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: movl %ebp, %r12d ; CHECK-NEXT: jne LBB0_21 ; CHECK-NEXT: jmp LBB0_55 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_14: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: leal 1(%r14), %eax +; CHECK-NEXT: leal 1(%rbp), %eax ; CHECK-NEXT: cmpl $21, %eax ; CHECK-NEXT: ja LBB0_20 ; CHECK-NEXT: ## %bb.15: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $-1, %r13d -; CHECK-NEXT: movslq (%rsi,%rax,4), %rax -; CHECK-NEXT: addq %rsi, %rax +; CHECK-NEXT: movl $-1, %r12d +; CHECK-NEXT: leaq {{.*}}(%rip), %rcx +; CHECK-NEXT: movslq (%rcx,%rax,4), %rax +; CHECK-NEXT: addq %rcx, %rax ; CHECK-NEXT: jmpq *%rax ; CHECK-NEXT: LBB0_18: ## %while.cond201.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $1, %r13d +; CHECK-NEXT: movl $1, %r12d ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_26: ## %sw.bb474 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: ## implicit-def: $r12 +; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: ## implicit-def: $r14 ; CHECK-NEXT: jne LBB0_34 ; CHECK-NEXT: ## %bb.27: ## %do.body479.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: ## implicit-def: $r12 +; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: ## implicit-def: $r14 ; CHECK-NEXT: jne LBB0_34 ; CHECK-NEXT: ## %bb.28: ## %land.rhs485.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 @@ -165,8 +166,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_32: ## %do.body479.backedge ; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 -; CHECK-NEXT: leaq 1(%r12), %rax -; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: leaq 1(%r14), %rax +; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: je LBB0_33 ; CHECK-NEXT: ## %bb.29: ## %land.rhs485 ; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 @@ -175,15 +176,14 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: LBB0_30: ## %cond.true.i.i2780 ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 -; CHECK-NEXT: movq %rax, %r12 -; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: movq %rax, %r14 +; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: jne LBB0_32 ; CHECK-NEXT: ## %bb.31: ## %lor.rhs500 ; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 ; CHECK-NEXT: movl $256, %esi ## imm = 0x100 ; CHECK-NEXT: callq ___maskrune -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: jne LBB0_32 ; CHECK-NEXT: jmp LBB0_34 ; CHECK-NEXT: LBB0_45: ## %sw.bb1134 @@ -193,23 +193,23 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: cmpq %rax, %rcx ; CHECK-NEXT: jb LBB0_55 ; CHECK-NEXT: ## %bb.46: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: movl $268, %r13d ## imm = 0x10C +; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: movl $268, %r12d ## imm = 0x10C ; CHECK-NEXT: jmp LBB0_21 -; CHECK-NEXT: LBB0_19: ## %sw.bb243 +; CHECK-NEXT: LBB0_40: ## %sw.bb566 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $2, %r13d +; CHECK-NEXT: movl $20, %r12d ; CHECK-NEXT: jmp LBB0_21 -; CHECK-NEXT: LBB0_40: ## %sw.bb566 +; CHECK-NEXT: LBB0_19: ## %sw.bb243 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $20, %r13d +; CHECK-NEXT: movl $2, %r12d ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_33: ## %if.end517.loopexitsplit ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: incq %r12 +; CHECK-NEXT: incq %r14 ; CHECK-NEXT: LBB0_34: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: leal -324(%r13), %eax +; CHECK-NEXT: leal -324(%r12), %eax ; CHECK-NEXT: cmpl $59, %eax ; CHECK-NEXT: ja LBB0_35 ; CHECK-NEXT: ## %bb.57: ## %if.end517 @@ -219,11 +219,11 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: jb LBB0_38 ; CHECK-NEXT: LBB0_35: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: cmpl $11, %r13d +; CHECK-NEXT: cmpl $11, %r12d ; CHECK-NEXT: je LBB0_38 ; CHECK-NEXT: ## %bb.36: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: cmpl $24, %r13d +; CHECK-NEXT: cmpl $24, %r12d ; CHECK-NEXT: je LBB0_38 ; CHECK-NEXT: ## %bb.37: ## %if.then532 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 @@ -233,15 +233,14 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: LBB0_38: ## %for.cond534 ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 -; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: jne LBB0_38 ; CHECK-NEXT: ## %bb.39: ## %for.cond542.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: movb $0, (%r12) -; CHECK-NEXT: movl %r14d, %r13d -; CHECK-NEXT: leaq {{.*}}(%rip), %rsi -; CHECK-NEXT: leaq {{.*}}(%rip), %rdi +; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: movb $0, (%r14) +; CHECK-NEXT: movl %ebp, %r12d +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 ## 8-byte Reload ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_42: ## %while.cond864 @@ -256,30 +255,44 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: jmp LBB0_25 ; CHECK-NEXT: LBB0_11: -; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: xorl %r13d, %r13d +; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: xorl %r12d, %r12d ; CHECK-NEXT: LBB0_22: ## %while.end1465 -; CHECK-NEXT: incl %r13d -; CHECK-NEXT: cmpl $16, %r13d +; CHECK-NEXT: incl %r12d +; CHECK-NEXT: cmpl $16, %r12d ; CHECK-NEXT: ja LBB0_50 ; CHECK-NEXT: ## %bb.23: ## %while.end1465 ; CHECK-NEXT: movl $83969, %eax ## imm = 0x14801 -; CHECK-NEXT: btl %r13d, %eax +; CHECK-NEXT: btl %r12d, %eax ; CHECK-NEXT: jae LBB0_50 ; CHECK-NEXT: ## %bb.24: -; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload +; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: LBB0_48: ## %if.then1477 ; CHECK-NEXT: movl $1, %edx ; CHECK-NEXT: callq _write -; CHECK-NEXT: subq %rbp, %rbx +; CHECK-NEXT: subq %rbx, %r14 ; CHECK-NEXT: movq _syHistory@{{.*}}(%rip), %rax -; CHECK-NEXT: leaq 8189(%rbx,%rax), %rax +; CHECK-NEXT: leaq 8189(%r14,%rax), %rax ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_49: ## %for.body1723 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: decq %rax ; CHECK-NEXT: jmp LBB0_49 +; CHECK-NEXT: LBB0_47: ## %if.then1477.loopexit +; CHECK-NEXT: movq %r14, %rbx +; CHECK-NEXT: jmp LBB0_48 +; CHECK-NEXT: LBB0_16: ## %while.cond635.preheader +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je LBB0_41 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_17: ## %for.body643.us +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp LBB0_17 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_41: ## %while.cond661 +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp LBB0_41 ; CHECK-NEXT: LBB0_50: ## %for.cond1480.preheader ; CHECK-NEXT: movl $512, %eax ## imm = 0x200 ; CHECK-NEXT: cmpq %rax, %rax @@ -289,14 +302,15 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne LBB0_54 ; CHECK-NEXT: ## %bb.52: ## %while.body1679.preheader -; CHECK-NEXT: incl %ebp -; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: incl {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill ; CHECK-NEXT: LBB0_53: ## %while.body1679 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movq (%rbx), %rdi +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload +; CHECK-NEXT: movq (%rax), %rdi ; CHECK-NEXT: callq _fileno -; CHECK-NEXT: movslq %ebp, %rax -; CHECK-NEXT: leal 1(%rax), %ebp +; CHECK-NEXT: movslq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 4-byte Folded Reload +; CHECK-NEXT: leal 1(%rax), %ecx +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill ; CHECK-NEXT: cmpq %rax, %rax ; CHECK-NEXT: jl LBB0_53 ; CHECK-NEXT: LBB0_54: ## %while.cond1683.preheader @@ -304,22 +318,6 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: LBB0_55: ## %if.then.i ; CHECK-NEXT: ud2 -; CHECK-NEXT: LBB0_47: ## %if.then1477.loopexit -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload -; CHECK-NEXT: movq %rbx, %rbp -; CHECK-NEXT: jmp LBB0_48 -; CHECK-NEXT: LBB0_16: ## %while.cond635.preheader -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je LBB0_41 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_17: ## %for.body643.us -; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: jmp LBB0_17 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_41: ## %while.cond661 -; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: jmp LBB0_41 entry: %sub.ptr.rhs.cast646 = ptrtoint i8* %line to i64 %old = alloca [512 x i8], align 16 |