diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/include/llvm/Target/TargetInstrInfo.h | 5 | ||||
-rw-r--r-- | llvm/lib/CodeGen/TailDuplication.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 93 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.h | 96 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/block-placement.ll | 38 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/fast-isel-cmp-branch2.ll | 5 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/fast-isel-cmp-branch3.ll | 5 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/fp-une-cmp.ll | 38 |
8 files changed, 179 insertions, 107 deletions
diff --git a/llvm/include/llvm/Target/TargetInstrInfo.h b/llvm/include/llvm/Target/TargetInstrInfo.h index 28ca307ec9a..05ba3ba5025 100644 --- a/llvm/include/llvm/Target/TargetInstrInfo.h +++ b/llvm/include/llvm/Target/TargetInstrInfo.h @@ -452,6 +452,8 @@ public: /// If AllowModify is true, then this routine is allowed to modify the basic /// block (e.g. delete instructions after the unconditional branch). /// + /// The CFG information in MBB.Predecessors and MBB.Successors must be valid + /// before calling this function. virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, @@ -521,6 +523,9 @@ public: /// cases where AnalyzeBranch doesn't apply because there was no original /// branch to analyze. At least this much must be implemented, else tail /// merging needs to be disabled. + /// + /// The CFG information in MBB.Predecessors and MBB.Successors must be valid + /// before calling this function. virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, diff --git a/llvm/lib/CodeGen/TailDuplication.cpp b/llvm/lib/CodeGen/TailDuplication.cpp index da593ce3717..2a8431c2bab 100644 --- a/llvm/lib/CodeGen/TailDuplication.cpp +++ b/llvm/lib/CodeGen/TailDuplication.cpp @@ -749,9 +749,6 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, TII->RemoveBranch(*PredBB); - if (PredTBB) - TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc()); - if (!PredBB->isSuccessor(NewTarget)) PredBB->replaceSuccessor(TailBB, NewTarget); else { @@ -759,6 +756,9 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, assert(PredBB->succ_size() <= 1); } + if (PredTBB) + TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc()); + TDBBs.push_back(PredBB); } return Changed; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index dbfa8aca8ac..d61ae4d7bab 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3807,6 +3807,8 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { case X86::COND_NP: return X86::COND_P; case X86::COND_O: return X86::COND_NO; case X86::COND_NO: return X86::COND_O; + case X86::COND_NE_OR_P: return X86::COND_E_AND_NP; + case X86::COND_E_AND_NP: return X86::COND_NE_OR_P; } } @@ -3914,6 +3916,23 @@ bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const { return !isPredicated(MI); } +// Given a MBB and its TBB, find the FBB which was a fallthrough MBB (it may not +// be a fallthorough MBB now due to layout changes). Return nullptr if the +// fallthough MBB cannot be identified. +static MachineBasicBlock *getFallThroughMBB(MachineBasicBlock *MBB, + MachineBasicBlock *TBB) { + MachineBasicBlock *FallthroughBB = nullptr; + for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { + if ((*SI)->isEHPad() || *SI == TBB) + continue; + // Return a nullptr if we found more than one fallthrough successor. + if (FallthroughBB) + return nullptr; + FallthroughBB = *SI; + } + return FallthroughBB; +} + bool X86InstrInfo::AnalyzeBranchImpl( MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, @@ -4026,30 +4045,45 @@ bool X86InstrInfo::AnalyzeBranchImpl( assert(Cond.size() == 1); assert(TBB); - // Only handle the case where all conditional branches branch to the same - // destination. - if (TBB != I->getOperand(0).getMBB()) - return true; - // If the conditions are the same, we can leave them alone. X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm(); - if (OldBranchCode == BranchCode) + auto NewTBB = I->getOperand(0).getMBB(); + if (OldBranchCode == BranchCode && TBB == NewTBB) continue; // If they differ, see if they fit one of the known patterns. Theoretically, // we could handle more patterns here, but we shouldn't expect to see them // if instruction selection has done a reasonable job. - if ((OldBranchCode == X86::COND_NP && - BranchCode == X86::COND_E) || - (OldBranchCode == X86::COND_E && - BranchCode == X86::COND_NP)) - BranchCode = X86::COND_NP_OR_E; - else if ((OldBranchCode == X86::COND_P && - BranchCode == X86::COND_NE) || - (OldBranchCode == X86::COND_NE && - BranchCode == X86::COND_P)) + if (TBB == NewTBB && + ((OldBranchCode == X86::COND_P && BranchCode == X86::COND_NE) || + (OldBranchCode == X86::COND_NE && BranchCode == X86::COND_P))) { BranchCode = X86::COND_NE_OR_P; - else + } else if ((OldBranchCode == X86::COND_NP && BranchCode == X86::COND_NE) || + (OldBranchCode == X86::COND_E && BranchCode == X86::COND_P)) { + if (NewTBB != (FBB ? FBB : getFallThroughMBB(&MBB, TBB))) + return true; + + // X86::COND_E_AND_NP usually has two different branch destinations. + // + // JP B1 + // JE B2 + // JMP B1 + // B1: + // B2: + // + // Here this condition branches to B2 only if NP && E. It has another + // equivalent form: + // + // JNE B1 + // JNP B2 + // JMP B1 + // B1: + // B2: + // + // Similarly it branches to B2 only if E && NP. That is why this condition + // is named with COND_E_AND_NP. + BranchCode = X86::COND_E_AND_NP; + } else return true; // Update the MachineOperand. @@ -4174,17 +4208,13 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, return 1; } + // If FBB is null, it is implied to be a fall-through block. + bool FallThru = FBB == nullptr; + // Conditional branch. unsigned Count = 0; X86::CondCode CC = (X86::CondCode)Cond[0].getImm(); switch (CC) { - case X86::COND_NP_OR_E: - // Synthesize NP_OR_E with two branches. - BuildMI(&MBB, DL, get(X86::JNP_1)).addMBB(TBB); - ++Count; - BuildMI(&MBB, DL, get(X86::JE_1)).addMBB(TBB); - ++Count; - break; case X86::COND_NE_OR_P: // Synthesize NE_OR_P with two branches. BuildMI(&MBB, DL, get(X86::JNE_1)).addMBB(TBB); @@ -4192,13 +4222,26 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, BuildMI(&MBB, DL, get(X86::JP_1)).addMBB(TBB); ++Count; break; + case X86::COND_E_AND_NP: + // Use the next block of MBB as FBB if it is null. + if (FBB == nullptr) { + FBB = getFallThroughMBB(&MBB, TBB); + assert(FBB && "MBB cannot be the last block in function when the false " + "body is a fall-through."); + } + // Synthesize COND_E_AND_NP with two branches. + BuildMI(&MBB, DL, get(X86::JNE_1)).addMBB(FBB); + ++Count; + BuildMI(&MBB, DL, get(X86::JNP_1)).addMBB(TBB); + ++Count; + break; default: { unsigned Opc = GetCondBranchFromCond(CC); BuildMI(&MBB, DL, get(Opc)).addMBB(TBB); ++Count; } } - if (FBB) { + if (!FallThru) { // Two-way Conditional branch. Insert the second branch. BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(FBB); ++Count; @@ -6759,8 +6802,6 @@ bool X86InstrInfo:: ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { assert(Cond.size() == 1 && "Invalid X86 branch condition!"); X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm()); - if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E) - return true; Cond[0].setImm(GetOppositeBranchCondition(CC)); return false; } diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 3e3f2af7641..7439fa2f740 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -29,54 +29,54 @@ namespace llvm { namespace X86 { // X86 specific condition code. These correspond to X86_*_COND in // X86InstrInfo.td. They must be kept in synch. - enum CondCode { - COND_A = 0, - COND_AE = 1, - COND_B = 2, - COND_BE = 3, - COND_E = 4, - COND_G = 5, - COND_GE = 6, - COND_L = 7, - COND_LE = 8, - COND_NE = 9, - COND_NO = 10, - COND_NP = 11, - COND_NS = 12, - COND_O = 13, - COND_P = 14, - COND_S = 15, - LAST_VALID_COND = COND_S, - - // Artificial condition codes. These are used by AnalyzeBranch - // to indicate a block terminated with two conditional branches to - // the same location. This occurs in code using FCMP_OEQ or FCMP_UNE, - // which can't be represented on x86 with a single condition. These - // are never used in MachineInstrs. - COND_NE_OR_P, - COND_NP_OR_E, - - COND_INVALID - }; - - // Turn condition code into conditional branch opcode. - unsigned GetCondBranchFromCond(CondCode CC); - - /// \brief Return a set opcode for the given condition and whether it has - /// a memory operand. - unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand = false); - - /// \brief Return a cmov opcode for the given condition, register size in - /// bytes, and operand type. - unsigned getCMovFromCond(CondCode CC, unsigned RegBytes, - bool HasMemoryOperand = false); - - // Turn CMov opcode into condition code. - CondCode getCondFromCMovOpc(unsigned Opc); - - /// GetOppositeBranchCondition - Return the inverse of the specified cond, - /// e.g. turning COND_E to COND_NE. - CondCode GetOppositeBranchCondition(CondCode CC); +enum CondCode { + COND_A = 0, + COND_AE = 1, + COND_B = 2, + COND_BE = 3, + COND_E = 4, + COND_G = 5, + COND_GE = 6, + COND_L = 7, + COND_LE = 8, + COND_NE = 9, + COND_NO = 10, + COND_NP = 11, + COND_NS = 12, + COND_O = 13, + COND_P = 14, + COND_S = 15, + LAST_VALID_COND = COND_S, + + // Artificial condition codes. These are used by AnalyzeBranch + // to indicate a block terminated with two conditional branches that together + // form a compound condition. They occur in code using FCMP_OEQ or FCMP_UNE, + // which can't be represented on x86 with a single condition. These + // are never used in MachineInstrs and are inverses of one another. + COND_NE_OR_P, + COND_E_AND_NP, + + COND_INVALID +}; + +// Turn condition code into conditional branch opcode. +unsigned GetCondBranchFromCond(CondCode CC); + +/// \brief Return a set opcode for the given condition and whether it has +/// a memory operand. +unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand = false); + +/// \brief Return a cmov opcode for the given condition, register size in +/// bytes, and operand type. +unsigned getCMovFromCond(CondCode CC, unsigned RegBytes, + bool HasMemoryOperand = false); + +// Turn CMov opcode into condition code. +CondCode getCondFromCMovOpc(unsigned Opc); + +/// GetOppositeBranchCondition - Return the inverse of the specified cond, +/// e.g. turning COND_E to COND_NE. +CondCode GetOppositeBranchCondition(CondCode CC); } // end namespace X86; diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll index 98d37153876..fd389b5f145 100644 --- a/llvm/test/CodeGen/X86/block-placement.ll +++ b/llvm/test/CodeGen/X86/block-placement.ll @@ -463,26 +463,24 @@ exit: } define void @fpcmp_unanalyzable_branch(i1 %cond) { -; This function's CFG contains an unanalyzable branch that is likely to be -; split due to having a different high-probability predecessor. -; CHECK: fpcmp_unanalyzable_branch -; CHECK: %entry -; CHECK: %exit -; CHECK-NOT: %if.then -; CHECK-NOT: %if.end -; CHECK-NOT: jne -; CHECK-NOT: jnp -; CHECK: jne -; CHECK-NEXT: jnp -; CHECK-NEXT: %if.then +; This function's CFG contains an once-unanalyzable branch (une on floating +; points). As now it becomes analyzable, we should get best layout in which each +; edge in 'entry' -> 'entry.if.then_crit_edge' -> 'if.then' -> 'if.end' is +; fall-through. +; CHECK-LABEL: fpcmp_unanalyzable_branch: +; CHECK: # BB#0: # %entry +; CHECK: # BB#1: # %entry.if.then_crit_edge +; CHECK: .LBB10_4: # %if.then +; CHECK: .LBB10_5: # %if.end +; CHECK: # BB#3: # %exit +; CHECK: jne .LBB10_4 +; CHECK-NEXT: jnp .LBB10_5 +; CHECK-NEXT: jmp .LBB10_4 entry: ; Note that this branch must be strongly biased toward ; 'entry.if.then_crit_edge' to ensure that we would try to form a chain for -; 'entry' -> 'entry.if.then_crit_edge' -> 'if.then'. It is the last edge in that -; chain which would violate the unanalyzable branch in 'exit', but we won't even -; try this trick unless 'if.then' is believed to almost always be reached from -; 'entry.if.then_crit_edge'. +; 'entry' -> 'entry.if.then_crit_edge' -> 'if.then' -> 'if.end'. br i1 %cond, label %entry.if.then_crit_edge, label %lor.lhs.false, !prof !1 entry.if.then_crit_edge: @@ -494,7 +492,7 @@ lor.lhs.false: exit: %cmp.i = fcmp une double 0.000000e+00, undef - br i1 %cmp.i, label %if.then, label %if.end + br i1 %cmp.i, label %if.then, label %if.end, !prof !3 if.then: %0 = phi i8 [ %.pre14, %entry.if.then_crit_edge ], [ undef, %exit ] @@ -507,6 +505,7 @@ if.end: } !1 = !{!"branch_weights", i32 1000, i32 1} +!3 = !{!"branch_weights", i32 1, i32 1000} declare i32 @f() declare i32 @g() @@ -665,11 +664,14 @@ define void @unanalyzable_branch_to_best_succ(i1 %cond) { ; Ensure that we can handle unanalyzable branches where the destination block ; gets selected as the optimal successor to merge. ; +; This branch is now analyzable and hence the destination block becomes the +; hotter one. The right order is entry->bar->exit->foo. +; ; CHECK: unanalyzable_branch_to_best_succ ; CHECK: %entry -; CHECK: %foo ; CHECK: %bar ; CHECK: %exit +; CHECK: %foo entry: ; Bias this branch toward bar to ensure we form that chain. diff --git a/llvm/test/CodeGen/X86/fast-isel-cmp-branch2.ll b/llvm/test/CodeGen/X86/fast-isel-cmp-branch2.ll index 04dbac07690..475d8fcf7f3 100644 --- a/llvm/test/CodeGen/X86/fast-isel-cmp-branch2.ll +++ b/llvm/test/CodeGen/X86/fast-isel-cmp-branch2.ll @@ -5,7 +5,7 @@ define i32 @fcmp_oeq(float %x, float %y) { ; CHECK-LABEL: fcmp_oeq ; CHECK: ucomiss %xmm1, %xmm0 ; CHECK-NEXT: jne {{LBB.+_1}} -; CHECK-NEXT: jnp {{LBB.+_2}} +; CHECK-NEXT: jp {{LBB.+_1}} %1 = fcmp oeq float %x, %y br i1 %1, label %bb1, label %bb2 bb2: @@ -162,8 +162,7 @@ define i32 @fcmp_une(float %x, float %y) { ; CHECK-LABEL: fcmp_une ; CHECK: ucomiss %xmm1, %xmm0 ; CHECK-NEXT: jne {{LBB.+_2}} -; CHECK-NEXT: jp {{LBB.+_2}} -; CHECK-NEXT: jmp {{LBB.+_1}} +; CHECK-NEXT: jnp {{LBB.+_1}} %1 = fcmp une float %x, %y br i1 %1, label %bb1, label %bb2 bb2: diff --git a/llvm/test/CodeGen/X86/fast-isel-cmp-branch3.ll b/llvm/test/CodeGen/X86/fast-isel-cmp-branch3.ll index e54d0ca4007..8f09b2e3835 100644 --- a/llvm/test/CodeGen/X86/fast-isel-cmp-branch3.ll +++ b/llvm/test/CodeGen/X86/fast-isel-cmp-branch3.ll @@ -17,7 +17,7 @@ define i32 @fcmp_oeq2(float %x) { ; CHECK: xorps %xmm1, %xmm1 ; CHECK-NEXT: ucomiss %xmm1, %xmm0 ; CHECK-NEXT: jne {{LBB.+_1}} -; CHECK-NEXT: jnp {{LBB.+_2}} +; CHECK-NEXT: jp {{LBB.+_1}} %1 = fcmp oeq float %x, 0.000000e+00 br i1 %1, label %bb1, label %bb2 bb2: @@ -338,8 +338,7 @@ define i32 @fcmp_une2(float %x) { ; CHECK: xorps %xmm1, %xmm1 ; CHECK-NEXT: ucomiss %xmm1, %xmm0 ; CHECK-NEXT: jne {{LBB.+_2}} -; CHECK-NEXT: jp {{LBB.+_2}} -; CHECK-NEXT: jmp {{LBB.+_1}} +; CHECK-NEXT: jnp {{LBB.+_1}} %1 = fcmp une float %x, 0.000000e+00 br i1 %1, label %bb1, label %bb2 bb2: diff --git a/llvm/test/CodeGen/X86/fp-une-cmp.ll b/llvm/test/CodeGen/X86/fp-une-cmp.ll index 9fab5c4dc83..179fffca379 100644 --- a/llvm/test/CodeGen/X86/fp-une-cmp.ll +++ b/llvm/test/CodeGen/X86/fp-une-cmp.ll @@ -48,8 +48,6 @@ bb2: ret double %phi } -; FIXME: With branch weights indicated, bb2 should be placed ahead of bb1. - define double @profile_metadata(double %x, double %y) { ; CHECK-LABEL: profile_metadata: ; CHECK: # BB#0: # %entry @@ -57,11 +55,12 @@ define double @profile_metadata(double %x, double %y) { ; CHECK-NEXT: xorpd %xmm1, %xmm1 ; CHECK-NEXT: ucomisd %xmm1, %xmm0 ; CHECK-NEXT: jne .LBB1_1 -; CHECK-NEXT: jnp .LBB1_2 -; CHECK-NEXT: .LBB1_1: # %bb1 -; CHECK-NEXT: addsd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: jp .LBB1_1 ; CHECK-NEXT: .LBB1_2: # %bb2 ; CHECK-NEXT: retq +; CHECK-NEXT: .LBB1_1: # %bb1 +; CHECK-NEXT: addsd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: jmp .LBB1_2 entry: %mul = fmul double %x, %y @@ -77,5 +76,32 @@ bb2: ret double %phi } -!1 = !{!"branch_weights", i32 1, i32 1000} +; Test if the negation of the non-equality check between floating points are +; translated to jnp followed by jne. +define void @foo(float %f) { +; CHECK-LABEL: foo: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: jne .LBB2_2 +; CHECK-NEXT: jnp .LBB2_1 +; CHECK-NEXT: .LBB2_2: # %if.then +; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: .LBB2_1: # %if.end +; CHECK-NEXT: retq +entry: + %cmp = fcmp une float %f, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @a() + br label %if.end + +if.end: + ret void +} + +declare void @a() + +!1 = !{!"branch_weights", i32 1, i32 1000} |