summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm/Target/TargetInstrInfo.h5
-rw-r--r--llvm/lib/CodeGen/TailDuplication.cpp6
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp93
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.h96
-rw-r--r--llvm/test/CodeGen/X86/block-placement.ll38
-rw-r--r--llvm/test/CodeGen/X86/fast-isel-cmp-branch2.ll5
-rw-r--r--llvm/test/CodeGen/X86/fast-isel-cmp-branch3.ll5
-rw-r--r--llvm/test/CodeGen/X86/fp-une-cmp.ll38
8 files changed, 179 insertions, 107 deletions
diff --git a/llvm/include/llvm/Target/TargetInstrInfo.h b/llvm/include/llvm/Target/TargetInstrInfo.h
index 28ca307ec9a..05ba3ba5025 100644
--- a/llvm/include/llvm/Target/TargetInstrInfo.h
+++ b/llvm/include/llvm/Target/TargetInstrInfo.h
@@ -452,6 +452,8 @@ public:
/// If AllowModify is true, then this routine is allowed to modify the basic
/// block (e.g. delete instructions after the unconditional branch).
///
+ /// The CFG information in MBB.Predecessors and MBB.Successors must be valid
+ /// before calling this function.
virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
@@ -521,6 +523,9 @@ public:
/// cases where AnalyzeBranch doesn't apply because there was no original
/// branch to analyze. At least this much must be implemented, else tail
/// merging needs to be disabled.
+ ///
+ /// The CFG information in MBB.Predecessors and MBB.Successors must be valid
+ /// before calling this function.
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
ArrayRef<MachineOperand> Cond,
diff --git a/llvm/lib/CodeGen/TailDuplication.cpp b/llvm/lib/CodeGen/TailDuplication.cpp
index da593ce3717..2a8431c2bab 100644
--- a/llvm/lib/CodeGen/TailDuplication.cpp
+++ b/llvm/lib/CodeGen/TailDuplication.cpp
@@ -749,9 +749,6 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
TII->RemoveBranch(*PredBB);
- if (PredTBB)
- TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
-
if (!PredBB->isSuccessor(NewTarget))
PredBB->replaceSuccessor(TailBB, NewTarget);
else {
@@ -759,6 +756,9 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
assert(PredBB->succ_size() <= 1);
}
+ if (PredTBB)
+ TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
+
TDBBs.push_back(PredBB);
}
return Changed;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index dbfa8aca8ac..d61ae4d7bab 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -3807,6 +3807,8 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
case X86::COND_NP: return X86::COND_P;
case X86::COND_O: return X86::COND_NO;
case X86::COND_NO: return X86::COND_O;
+ case X86::COND_NE_OR_P: return X86::COND_E_AND_NP;
+ case X86::COND_E_AND_NP: return X86::COND_NE_OR_P;
}
}
@@ -3914,6 +3916,23 @@ bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const {
return !isPredicated(MI);
}
+// Given a MBB and its TBB, find the FBB which was a fallthrough MBB (it may not
+// be a fallthorough MBB now due to layout changes). Return nullptr if the
+// fallthough MBB cannot be identified.
+static MachineBasicBlock *getFallThroughMBB(MachineBasicBlock *MBB,
+ MachineBasicBlock *TBB) {
+ MachineBasicBlock *FallthroughBB = nullptr;
+ for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) {
+ if ((*SI)->isEHPad() || *SI == TBB)
+ continue;
+ // Return a nullptr if we found more than one fallthrough successor.
+ if (FallthroughBB)
+ return nullptr;
+ FallthroughBB = *SI;
+ }
+ return FallthroughBB;
+}
+
bool X86InstrInfo::AnalyzeBranchImpl(
MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
@@ -4026,30 +4045,45 @@ bool X86InstrInfo::AnalyzeBranchImpl(
assert(Cond.size() == 1);
assert(TBB);
- // Only handle the case where all conditional branches branch to the same
- // destination.
- if (TBB != I->getOperand(0).getMBB())
- return true;
-
// If the conditions are the same, we can leave them alone.
X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
- if (OldBranchCode == BranchCode)
+ auto NewTBB = I->getOperand(0).getMBB();
+ if (OldBranchCode == BranchCode && TBB == NewTBB)
continue;
// If they differ, see if they fit one of the known patterns. Theoretically,
// we could handle more patterns here, but we shouldn't expect to see them
// if instruction selection has done a reasonable job.
- if ((OldBranchCode == X86::COND_NP &&
- BranchCode == X86::COND_E) ||
- (OldBranchCode == X86::COND_E &&
- BranchCode == X86::COND_NP))
- BranchCode = X86::COND_NP_OR_E;
- else if ((OldBranchCode == X86::COND_P &&
- BranchCode == X86::COND_NE) ||
- (OldBranchCode == X86::COND_NE &&
- BranchCode == X86::COND_P))
+ if (TBB == NewTBB &&
+ ((OldBranchCode == X86::COND_P && BranchCode == X86::COND_NE) ||
+ (OldBranchCode == X86::COND_NE && BranchCode == X86::COND_P))) {
BranchCode = X86::COND_NE_OR_P;
- else
+ } else if ((OldBranchCode == X86::COND_NP && BranchCode == X86::COND_NE) ||
+ (OldBranchCode == X86::COND_E && BranchCode == X86::COND_P)) {
+ if (NewTBB != (FBB ? FBB : getFallThroughMBB(&MBB, TBB)))
+ return true;
+
+ // X86::COND_E_AND_NP usually has two different branch destinations.
+ //
+ // JP B1
+ // JE B2
+ // JMP B1
+ // B1:
+ // B2:
+ //
+ // Here this condition branches to B2 only if NP && E. It has another
+ // equivalent form:
+ //
+ // JNE B1
+ // JNP B2
+ // JMP B1
+ // B1:
+ // B2:
+ //
+ // Similarly it branches to B2 only if E && NP. That is why this condition
+ // is named with COND_E_AND_NP.
+ BranchCode = X86::COND_E_AND_NP;
+ } else
return true;
// Update the MachineOperand.
@@ -4174,17 +4208,13 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
return 1;
}
+ // If FBB is null, it is implied to be a fall-through block.
+ bool FallThru = FBB == nullptr;
+
// Conditional branch.
unsigned Count = 0;
X86::CondCode CC = (X86::CondCode)Cond[0].getImm();
switch (CC) {
- case X86::COND_NP_OR_E:
- // Synthesize NP_OR_E with two branches.
- BuildMI(&MBB, DL, get(X86::JNP_1)).addMBB(TBB);
- ++Count;
- BuildMI(&MBB, DL, get(X86::JE_1)).addMBB(TBB);
- ++Count;
- break;
case X86::COND_NE_OR_P:
// Synthesize NE_OR_P with two branches.
BuildMI(&MBB, DL, get(X86::JNE_1)).addMBB(TBB);
@@ -4192,13 +4222,26 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
BuildMI(&MBB, DL, get(X86::JP_1)).addMBB(TBB);
++Count;
break;
+ case X86::COND_E_AND_NP:
+ // Use the next block of MBB as FBB if it is null.
+ if (FBB == nullptr) {
+ FBB = getFallThroughMBB(&MBB, TBB);
+ assert(FBB && "MBB cannot be the last block in function when the false "
+ "body is a fall-through.");
+ }
+ // Synthesize COND_E_AND_NP with two branches.
+ BuildMI(&MBB, DL, get(X86::JNE_1)).addMBB(FBB);
+ ++Count;
+ BuildMI(&MBB, DL, get(X86::JNP_1)).addMBB(TBB);
+ ++Count;
+ break;
default: {
unsigned Opc = GetCondBranchFromCond(CC);
BuildMI(&MBB, DL, get(Opc)).addMBB(TBB);
++Count;
}
}
- if (FBB) {
+ if (!FallThru) {
// Two-way Conditional branch. Insert the second branch.
BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(FBB);
++Count;
@@ -6759,8 +6802,6 @@ bool X86InstrInfo::
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
assert(Cond.size() == 1 && "Invalid X86 branch condition!");
X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm());
- if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E)
- return true;
Cond[0].setImm(GetOppositeBranchCondition(CC));
return false;
}
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index 3e3f2af7641..7439fa2f740 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -29,54 +29,54 @@ namespace llvm {
namespace X86 {
// X86 specific condition code. These correspond to X86_*_COND in
// X86InstrInfo.td. They must be kept in synch.
- enum CondCode {
- COND_A = 0,
- COND_AE = 1,
- COND_B = 2,
- COND_BE = 3,
- COND_E = 4,
- COND_G = 5,
- COND_GE = 6,
- COND_L = 7,
- COND_LE = 8,
- COND_NE = 9,
- COND_NO = 10,
- COND_NP = 11,
- COND_NS = 12,
- COND_O = 13,
- COND_P = 14,
- COND_S = 15,
- LAST_VALID_COND = COND_S,
-
- // Artificial condition codes. These are used by AnalyzeBranch
- // to indicate a block terminated with two conditional branches to
- // the same location. This occurs in code using FCMP_OEQ or FCMP_UNE,
- // which can't be represented on x86 with a single condition. These
- // are never used in MachineInstrs.
- COND_NE_OR_P,
- COND_NP_OR_E,
-
- COND_INVALID
- };
-
- // Turn condition code into conditional branch opcode.
- unsigned GetCondBranchFromCond(CondCode CC);
-
- /// \brief Return a set opcode for the given condition and whether it has
- /// a memory operand.
- unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand = false);
-
- /// \brief Return a cmov opcode for the given condition, register size in
- /// bytes, and operand type.
- unsigned getCMovFromCond(CondCode CC, unsigned RegBytes,
- bool HasMemoryOperand = false);
-
- // Turn CMov opcode into condition code.
- CondCode getCondFromCMovOpc(unsigned Opc);
-
- /// GetOppositeBranchCondition - Return the inverse of the specified cond,
- /// e.g. turning COND_E to COND_NE.
- CondCode GetOppositeBranchCondition(CondCode CC);
+enum CondCode {
+ COND_A = 0,
+ COND_AE = 1,
+ COND_B = 2,
+ COND_BE = 3,
+ COND_E = 4,
+ COND_G = 5,
+ COND_GE = 6,
+ COND_L = 7,
+ COND_LE = 8,
+ COND_NE = 9,
+ COND_NO = 10,
+ COND_NP = 11,
+ COND_NS = 12,
+ COND_O = 13,
+ COND_P = 14,
+ COND_S = 15,
+ LAST_VALID_COND = COND_S,
+
+ // Artificial condition codes. These are used by AnalyzeBranch
+ // to indicate a block terminated with two conditional branches that together
+ // form a compound condition. They occur in code using FCMP_OEQ or FCMP_UNE,
+ // which can't be represented on x86 with a single condition. These
+ // are never used in MachineInstrs and are inverses of one another.
+ COND_NE_OR_P,
+ COND_E_AND_NP,
+
+ COND_INVALID
+};
+
+// Turn condition code into conditional branch opcode.
+unsigned GetCondBranchFromCond(CondCode CC);
+
+/// \brief Return a set opcode for the given condition and whether it has
+/// a memory operand.
+unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand = false);
+
+/// \brief Return a cmov opcode for the given condition, register size in
+/// bytes, and operand type.
+unsigned getCMovFromCond(CondCode CC, unsigned RegBytes,
+ bool HasMemoryOperand = false);
+
+// Turn CMov opcode into condition code.
+CondCode getCondFromCMovOpc(unsigned Opc);
+
+/// GetOppositeBranchCondition - Return the inverse of the specified cond,
+/// e.g. turning COND_E to COND_NE.
+CondCode GetOppositeBranchCondition(CondCode CC);
} // end namespace X86;
diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll
index 98d37153876..fd389b5f145 100644
--- a/llvm/test/CodeGen/X86/block-placement.ll
+++ b/llvm/test/CodeGen/X86/block-placement.ll
@@ -463,26 +463,24 @@ exit:
}
define void @fpcmp_unanalyzable_branch(i1 %cond) {
-; This function's CFG contains an unanalyzable branch that is likely to be
-; split due to having a different high-probability predecessor.
-; CHECK: fpcmp_unanalyzable_branch
-; CHECK: %entry
-; CHECK: %exit
-; CHECK-NOT: %if.then
-; CHECK-NOT: %if.end
-; CHECK-NOT: jne
-; CHECK-NOT: jnp
-; CHECK: jne
-; CHECK-NEXT: jnp
-; CHECK-NEXT: %if.then
+; This function's CFG contains an once-unanalyzable branch (une on floating
+; points). As now it becomes analyzable, we should get best layout in which each
+; edge in 'entry' -> 'entry.if.then_crit_edge' -> 'if.then' -> 'if.end' is
+; fall-through.
+; CHECK-LABEL: fpcmp_unanalyzable_branch:
+; CHECK: # BB#0: # %entry
+; CHECK: # BB#1: # %entry.if.then_crit_edge
+; CHECK: .LBB10_4: # %if.then
+; CHECK: .LBB10_5: # %if.end
+; CHECK: # BB#3: # %exit
+; CHECK: jne .LBB10_4
+; CHECK-NEXT: jnp .LBB10_5
+; CHECK-NEXT: jmp .LBB10_4
entry:
; Note that this branch must be strongly biased toward
; 'entry.if.then_crit_edge' to ensure that we would try to form a chain for
-; 'entry' -> 'entry.if.then_crit_edge' -> 'if.then'. It is the last edge in that
-; chain which would violate the unanalyzable branch in 'exit', but we won't even
-; try this trick unless 'if.then' is believed to almost always be reached from
-; 'entry.if.then_crit_edge'.
+; 'entry' -> 'entry.if.then_crit_edge' -> 'if.then' -> 'if.end'.
br i1 %cond, label %entry.if.then_crit_edge, label %lor.lhs.false, !prof !1
entry.if.then_crit_edge:
@@ -494,7 +492,7 @@ lor.lhs.false:
exit:
%cmp.i = fcmp une double 0.000000e+00, undef
- br i1 %cmp.i, label %if.then, label %if.end
+ br i1 %cmp.i, label %if.then, label %if.end, !prof !3
if.then:
%0 = phi i8 [ %.pre14, %entry.if.then_crit_edge ], [ undef, %exit ]
@@ -507,6 +505,7 @@ if.end:
}
!1 = !{!"branch_weights", i32 1000, i32 1}
+!3 = !{!"branch_weights", i32 1, i32 1000}
declare i32 @f()
declare i32 @g()
@@ -665,11 +664,14 @@ define void @unanalyzable_branch_to_best_succ(i1 %cond) {
; Ensure that we can handle unanalyzable branches where the destination block
; gets selected as the optimal successor to merge.
;
+; This branch is now analyzable and hence the destination block becomes the
+; hotter one. The right order is entry->bar->exit->foo.
+;
; CHECK: unanalyzable_branch_to_best_succ
; CHECK: %entry
-; CHECK: %foo
; CHECK: %bar
; CHECK: %exit
+; CHECK: %foo
entry:
; Bias this branch toward bar to ensure we form that chain.
diff --git a/llvm/test/CodeGen/X86/fast-isel-cmp-branch2.ll b/llvm/test/CodeGen/X86/fast-isel-cmp-branch2.ll
index 04dbac07690..475d8fcf7f3 100644
--- a/llvm/test/CodeGen/X86/fast-isel-cmp-branch2.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-cmp-branch2.ll
@@ -5,7 +5,7 @@ define i32 @fcmp_oeq(float %x, float %y) {
; CHECK-LABEL: fcmp_oeq
; CHECK: ucomiss %xmm1, %xmm0
; CHECK-NEXT: jne {{LBB.+_1}}
-; CHECK-NEXT: jnp {{LBB.+_2}}
+; CHECK-NEXT: jp {{LBB.+_1}}
%1 = fcmp oeq float %x, %y
br i1 %1, label %bb1, label %bb2
bb2:
@@ -162,8 +162,7 @@ define i32 @fcmp_une(float %x, float %y) {
; CHECK-LABEL: fcmp_une
; CHECK: ucomiss %xmm1, %xmm0
; CHECK-NEXT: jne {{LBB.+_2}}
-; CHECK-NEXT: jp {{LBB.+_2}}
-; CHECK-NEXT: jmp {{LBB.+_1}}
+; CHECK-NEXT: jnp {{LBB.+_1}}
%1 = fcmp une float %x, %y
br i1 %1, label %bb1, label %bb2
bb2:
diff --git a/llvm/test/CodeGen/X86/fast-isel-cmp-branch3.ll b/llvm/test/CodeGen/X86/fast-isel-cmp-branch3.ll
index e54d0ca4007..8f09b2e3835 100644
--- a/llvm/test/CodeGen/X86/fast-isel-cmp-branch3.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-cmp-branch3.ll
@@ -17,7 +17,7 @@ define i32 @fcmp_oeq2(float %x) {
; CHECK: xorps %xmm1, %xmm1
; CHECK-NEXT: ucomiss %xmm1, %xmm0
; CHECK-NEXT: jne {{LBB.+_1}}
-; CHECK-NEXT: jnp {{LBB.+_2}}
+; CHECK-NEXT: jp {{LBB.+_1}}
%1 = fcmp oeq float %x, 0.000000e+00
br i1 %1, label %bb1, label %bb2
bb2:
@@ -338,8 +338,7 @@ define i32 @fcmp_une2(float %x) {
; CHECK: xorps %xmm1, %xmm1
; CHECK-NEXT: ucomiss %xmm1, %xmm0
; CHECK-NEXT: jne {{LBB.+_2}}
-; CHECK-NEXT: jp {{LBB.+_2}}
-; CHECK-NEXT: jmp {{LBB.+_1}}
+; CHECK-NEXT: jnp {{LBB.+_1}}
%1 = fcmp une float %x, 0.000000e+00
br i1 %1, label %bb1, label %bb2
bb2:
diff --git a/llvm/test/CodeGen/X86/fp-une-cmp.ll b/llvm/test/CodeGen/X86/fp-une-cmp.ll
index 9fab5c4dc83..179fffca379 100644
--- a/llvm/test/CodeGen/X86/fp-une-cmp.ll
+++ b/llvm/test/CodeGen/X86/fp-une-cmp.ll
@@ -48,8 +48,6 @@ bb2:
ret double %phi
}
-; FIXME: With branch weights indicated, bb2 should be placed ahead of bb1.
-
define double @profile_metadata(double %x, double %y) {
; CHECK-LABEL: profile_metadata:
; CHECK: # BB#0: # %entry
@@ -57,11 +55,12 @@ define double @profile_metadata(double %x, double %y) {
; CHECK-NEXT: xorpd %xmm1, %xmm1
; CHECK-NEXT: ucomisd %xmm1, %xmm0
; CHECK-NEXT: jne .LBB1_1
-; CHECK-NEXT: jnp .LBB1_2
-; CHECK-NEXT: .LBB1_1: # %bb1
-; CHECK-NEXT: addsd {{.*}}(%rip), %xmm0
+; CHECK-NEXT: jp .LBB1_1
; CHECK-NEXT: .LBB1_2: # %bb2
; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB1_1: # %bb1
+; CHECK-NEXT: addsd {{.*}}(%rip), %xmm0
+; CHECK-NEXT: jmp .LBB1_2
entry:
%mul = fmul double %x, %y
@@ -77,5 +76,32 @@ bb2:
ret double %phi
}
-!1 = !{!"branch_weights", i32 1, i32 1000}
+; Test if the negation of the non-equality check between floating points are
+; translated to jnp followed by jne.
+define void @foo(float %f) {
+; CHECK-LABEL: foo:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: jne .LBB2_2
+; CHECK-NEXT: jnp .LBB2_1
+; CHECK-NEXT: .LBB2_2: # %if.then
+; CHECK-NEXT: jmp a # TAILCALL
+; CHECK-NEXT: .LBB2_1: # %if.end
+; CHECK-NEXT: retq
+entry:
+ %cmp = fcmp une float %f, 0.000000e+00
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ tail call void @a()
+ br label %if.end
+
+if.end:
+ ret void
+}
+
+declare void @a()
+
+!1 = !{!"branch_weights", i32 1, i32 1000}
OpenPOWER on IntegriCloud