summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp142
-rw-r--r--llvm/lib/Target/Hexagon/HexagonPatterns.td2
-rw-r--r--llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/setcc.ll6
-rw-r--r--llvm/test/CodeGen/X86/and-sink.ll4
-rw-r--r--llvm/test/CodeGen/X86/fold-rmw-ops.ll15
-rw-r--r--llvm/test/CodeGen/X86/or-branch.ll27
7 files changed, 90 insertions, 108 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3f99843406a..b3a8eeb7c97 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -415,7 +415,8 @@ namespace {
SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
const SDLoc &DL);
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
- const SDLoc &DL, bool foldBooleans = true);
+ const SDLoc &DL, bool foldBooleans);
+ SDValue rebuildSetCC(SDValue N);
bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
SDValue &CC) const;
@@ -7157,9 +7158,33 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
}
SDValue DAGCombiner::visitSETCC(SDNode *N) {
- return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
- cast<CondCodeSDNode>(N->getOperand(2))->get(),
- SDLoc(N));
+ // setcc is very commonly used as an argument to brcond. This pattern
+ // also lend itself to numerous combines and, as a result, it is desired
+ // we keep the argument to a brcond as a setcc as much as possible.
+ bool PreferSetCC =
+ N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
+
+ SDValue Combined = SimplifySetCC(
+ N->getValueType(0), N->getOperand(0), N->getOperand(1),
+ cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
+
+ if (!Combined)
+ return SDValue();
+
+ // If we prefer to have a setcc, and we don't, we'll try our best to
+ // recreate one using rebuildSetCC.
+ if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
+ SDValue NewSetCC = rebuildSetCC(Combined);
+
+ // We don't have anything interesting to combine to.
+ if (NewSetCC.getNode() == N)
+ return SDValue();
+
+ if (NewSetCC)
+ return NewSetCC;
+ }
+
+ return Combined;
}
SDValue DAGCombiner::visitSETCCE(SDNode *N) {
@@ -11151,16 +11176,22 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
N1.getOperand(0), N1.getOperand(1), N2);
}
- if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
- ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
- (N1.getOperand(0).hasOneUse() &&
- N1.getOperand(0).getOpcode() == ISD::SRL))) {
- SDNode *Trunc = nullptr;
- if (N1.getOpcode() == ISD::TRUNCATE) {
- // Look pass the truncate.
- Trunc = N1.getNode();
- N1 = N1.getOperand(0);
- }
+ if (N1.hasOneUse()) {
+ if (SDValue NewN1 = rebuildSetCC(N1))
+ return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::rebuildSetCC(SDValue N) {
+ if (N.getOpcode() == ISD::SRL ||
+ (N.getOpcode() == ISD::TRUNCATE &&
+ (N.getOperand(0).hasOneUse() &&
+ N.getOperand(0).getOpcode() == ISD::SRL))) {
+ // Look pass the truncate.
+ if (N.getOpcode() == ISD::TRUNCATE)
+ N = N.getOperand(0);
// Match this pattern so that we can generate simpler code:
//
@@ -11179,75 +11210,43 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
// This applies only when the AND constant value has one bit set and the
// SRL constant is equal to the log2 of the AND constant. The back-end is
// smart enough to convert the result into a TEST/JMP sequence.
- SDValue Op0 = N1.getOperand(0);
- SDValue Op1 = N1.getOperand(1);
+ SDValue Op0 = N.getOperand(0);
+ SDValue Op1 = N.getOperand(1);
- if (Op0.getOpcode() == ISD::AND &&
- Op1.getOpcode() == ISD::Constant) {
+ if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
SDValue AndOp1 = Op0.getOperand(1);
if (AndOp1.getOpcode() == ISD::Constant) {
const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
if (AndConst.isPowerOf2() &&
- cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
+ cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
SDLoc DL(N);
- SDValue SetCC =
- DAG.getSetCC(DL,
- getSetCCResultType(Op0.getValueType()),
- Op0, DAG.getConstant(0, DL, Op0.getValueType()),
- ISD::SETNE);
-
- SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
- MVT::Other, Chain, SetCC, N2);
- // Don't add the new BRCond into the worklist or else SimplifySelectCC
- // will convert it back to (X & C1) >> C2.
- CombineTo(N, NewBRCond, false);
- // Truncate is dead.
- if (Trunc)
- deleteAndRecombine(Trunc);
- // Replace the uses of SRL with SETCC
- WorklistRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
- deleteAndRecombine(N1.getNode());
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
+ Op0, DAG.getConstant(0, DL, Op0.getValueType()),
+ ISD::SETNE);
}
}
}
-
- if (Trunc)
- // Restore N1 if the above transformation doesn't match.
- N1 = N->getOperand(1);
}
// Transform br(xor(x, y)) -> br(x != y)
// Transform br(xor(xor(x,y), 1)) -> br (x == y)
- if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
- SDNode *TheXor = N1.getNode();
- SDValue Op0 = TheXor->getOperand(0);
- SDValue Op1 = TheXor->getOperand(1);
- if (Op0.getOpcode() == Op1.getOpcode()) {
- // Avoid missing important xor optimizations.
- if (SDValue Tmp = visitXOR(TheXor)) {
- if (Tmp.getNode() != TheXor) {
- DEBUG(dbgs() << "\nReplacing.8 ";
- TheXor->dump(&DAG);
- dbgs() << "\nWith: ";
- Tmp.getNode()->dump(&DAG);
- dbgs() << '\n');
- WorklistRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
- deleteAndRecombine(TheXor);
- return DAG.getNode(ISD::BRCOND, SDLoc(N),
- MVT::Other, Chain, Tmp, N2);
- }
+ if (N.getOpcode() == ISD::XOR) {
+ SDNode *TheXor = N.getNode();
- // visitXOR has changed XOR's operands or replaced the XOR completely,
- // bail out.
- return SDValue(N, 0);
- }
+ // Avoid missing important xor optimizations.
+ while (SDValue Tmp = visitXOR(TheXor)) {
+ // We don't have a XOR anymore, bail.
+ if (Tmp.getOpcode() != ISD::XOR)
+ return Tmp;
+
+ TheXor = Tmp.getNode();
}
+ SDValue Op0 = TheXor->getOperand(0);
+ SDValue Op1 = TheXor->getOperand(1);
+
if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
bool Equal = false;
if (isOneConstant(Op0) && Op0.hasOneUse() &&
@@ -11256,19 +11255,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
Equal = true;
}
- EVT SetCCVT = N1.getValueType();
+ EVT SetCCVT = N.getValueType();
if (LegalTypes)
SetCCVT = getSetCCResultType(SetCCVT);
- SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
- SetCCVT,
- Op0, Op1,
- Equal ? ISD::SETEQ : ISD::SETNE);
// Replace the uses of XOR with SETCC
- WorklistRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
- deleteAndRecombine(N1.getNode());
- return DAG.getNode(ISD::BRCOND, SDLoc(N),
- MVT::Other, Chain, SetCC, N2);
+ return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
+ Equal ? ISD::SETEQ : ISD::SETNE);
}
}
diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index 70969e283c0..b0aaded4e98 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -2800,6 +2800,8 @@ def: Pat<(brcond (not I1:$Pu), bb:$dst),
(J2_jumpf I1:$Pu, bb:$dst)>;
def: Pat<(brcond (i1 (setne I1:$Pu, -1)), bb:$dst),
(J2_jumpf I1:$Pu, bb:$dst)>;
+def: Pat<(brcond (i1 (seteq I1:$Pu, 0)), bb:$dst),
+ (J2_jumpf I1:$Pu, bb:$dst)>;
def: Pat<(brcond (i1 (setne I1:$Pu, 0)), bb:$dst),
(J2_jumpt I1:$Pu, bb:$dst)>;
diff --git a/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll b/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll
index 208d97feb64..789decb45f2 100644
--- a/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll
+++ b/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll
@@ -7,7 +7,6 @@ declare i1 @llvm.amdgcn.class.f32(float, i32)
; GCN-LABEL: {{^}}vcc_shrink_vcc_def:
; GCN: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}}
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
-; GCN: v_cndmask_b32_e64 v0, 0, 1, s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @vcc_shrink_vcc_def(float %arg, i32 %arg1, float %arg2, i32 %arg3) {
bb0:
%tmp = icmp sgt i32 %arg1, 4
@@ -34,7 +33,6 @@ bb2:
; GCN-LABEL: {{^}}preserve_condition_undef_flag:
; GCN-NOT: vcc
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
-; GCN: v_cndmask_b32_e64 v0, 0, 1, s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @preserve_condition_undef_flag(float %arg, i32 %arg1, float %arg2) {
bb0:
%tmp = icmp sgt i32 %arg1, 4
diff --git a/llvm/test/CodeGen/AMDGPU/setcc.ll b/llvm/test/CodeGen/AMDGPU/setcc.ll
index 442a3163270..f0a2b898195 100644
--- a/llvm/test/CodeGen/AMDGPU/setcc.ll
+++ b/llvm/test/CodeGen/AMDGPU/setcc.ll
@@ -397,9 +397,9 @@ endif:
}
; FUNC-LABEL: setcc-i1-and-xor
-; GCN-DAG: v_cmp_ge_f32_e64 [[A:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
-; GCN-DAG: v_cmp_le_f32_e64 [[B:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 1.0
-; GCN: s_and_b64 s[2:3], [[A]], [[B]]
+; GCN-DAG: v_cmp_nge_f32_e64 [[A:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
+; GCN-DAG: v_cmp_nle_f32_e64 [[B:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 1.0
+; GCN: s_or_b64 s[2:3], [[A]], [[B]]
define amdgpu_kernel void @setcc-i1-and-xor(i32 addrspace(1)* %out, float %cond) #0 {
bb0:
%tmp5 = fcmp oge float %cond, 0.000000e+00
diff --git a/llvm/test/CodeGen/X86/and-sink.ll b/llvm/test/CodeGen/X86/and-sink.ll
index 6d23d6cfb70..0a363667648 100644
--- a/llvm/test/CodeGen/X86/and-sink.ll
+++ b/llvm/test/CodeGen/X86/and-sink.ll
@@ -14,8 +14,8 @@ define i32 @and_sink1(i32 %a, i1 %c) {
; CHECK-NEXT: je .LBB0_3
; CHECK-NEXT: # %bb.1: # %bb0
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movl $0, A
; CHECK-NEXT: testb $4, %al
+; CHECK-NEXT: movl $0, A
; CHECK-NEXT: jne .LBB0_3
; CHECK-NEXT: # %bb.2: # %bb1
; CHECK-NEXT: movl $1, %eax
@@ -61,8 +61,8 @@ define i32 @and_sink2(i32 %a, i1 %c, i1 %c2) {
; CHECK-NEXT: je .LBB1_5
; CHECK-NEXT: # %bb.3: # %bb1
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: movl $0, C
; CHECK-NEXT: testb $4, %cl
+; CHECK-NEXT: movl $0, C
; CHECK-NEXT: jne .LBB1_2
; CHECK-NEXT: # %bb.4: # %bb2
; CHECK-NEXT: movl $1, %eax
diff --git a/llvm/test/CodeGen/X86/fold-rmw-ops.ll b/llvm/test/CodeGen/X86/fold-rmw-ops.ll
index bb89d4b54ea..c524486d2c7 100644
--- a/llvm/test/CodeGen/X86/fold-rmw-ops.ll
+++ b/llvm/test/CodeGen/X86/fold-rmw-ops.ll
@@ -1146,12 +1146,9 @@ b:
define void @and32_imm_br() nounwind {
; CHECK-LABEL: and32_imm_br:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movl $-2147483648, %eax # encoding: [0xb8,0x00,0x00,0x00,0x80]
+; CHECK-NEXT: andl $-2147483648, {{.*}}(%rip) # encoding: [0x81,0x25,A,A,A,A,0x00,0x00,0x00,0x80]
+; CHECK-NEXT: # fixup A - offset: 2, value: g32-8, kind: reloc_riprel_4byte
; CHECK-NEXT: # imm = 0x80000000
-; CHECK-NEXT: andl {{.*}}(%rip), %eax # encoding: [0x23,0x05,A,A,A,A]
-; CHECK-NEXT: # fixup A - offset: 2, value: g32-4, kind: reloc_riprel_4byte
-; CHECK-NEXT: movl %eax, {{.*}}(%rip) # encoding: [0x89,0x05,A,A,A,A]
-; CHECK-NEXT: # fixup A - offset: 2, value: g32-4, kind: reloc_riprel_4byte
; CHECK-NEXT: jne .LBB35_2 # encoding: [0x75,A]
; CHECK-NEXT: # fixup A - offset: 1, value: .LBB35_2-1, kind: FK_PCRel_1
; CHECK-NEXT: # %bb.1: # %a
@@ -1244,13 +1241,9 @@ b:
define void @and16_imm_br() nounwind {
; CHECK-LABEL: and16_imm_br:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movzwl {{.*}}(%rip), %eax # encoding: [0x0f,0xb7,0x05,A,A,A,A]
-; CHECK-NEXT: # fixup A - offset: 3, value: g16-4, kind: reloc_riprel_4byte
-; CHECK-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00]
+; CHECK-NEXT: andw $-32768, {{.*}}(%rip) # encoding: [0x66,0x81,0x25,A,A,A,A,0x00,0x80]
+; CHECK-NEXT: # fixup A - offset: 3, value: g16-6, kind: reloc_riprel_4byte
; CHECK-NEXT: # imm = 0x8000
-; CHECK-NEXT: movw %ax, {{.*}}(%rip) # encoding: [0x66,0x89,0x05,A,A,A,A]
-; CHECK-NEXT: # fixup A - offset: 3, value: g16-4, kind: reloc_riprel_4byte
-; CHECK-NEXT: testw %ax, %ax # encoding: [0x66,0x85,0xc0]
; CHECK-NEXT: jne .LBB38_2 # encoding: [0x75,A]
; CHECK-NEXT: # fixup A - offset: 1, value: .LBB38_2-1, kind: FK_PCRel_1
; CHECK-NEXT: # %bb.1: # %a
diff --git a/llvm/test/CodeGen/X86/or-branch.ll b/llvm/test/CodeGen/X86/or-branch.ll
index 276258a3d40..a1a29cf744f 100644
--- a/llvm/test/CodeGen/X86/or-branch.ll
+++ b/llvm/test/CodeGen/X86/or-branch.ll
@@ -19,11 +19,10 @@ define void @foo(i32 %X, i32 %Y, i32 %Z) nounwind {
; JUMP1-LABEL: foo:
; JUMP1: # %bb.0: # %entry
; JUMP1-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; JUMP1-NEXT: sete %al
-; JUMP1-NEXT: cmpl $5, {{[0-9]+}}(%esp)
-; JUMP1-NEXT: setl %cl
-; JUMP1-NEXT: orb %al, %cl
-; JUMP1-NEXT: cmpb $1, %cl
+; JUMP1-NEXT: setne %al
+; JUMP1-NEXT: cmpl $4, {{[0-9]+}}(%esp)
+; JUMP1-NEXT: setg %cl
+; JUMP1-NEXT: testb %al, %cl
; JUMP1-NEXT: jne .LBB0_1
; JUMP1-NEXT: # %bb.2: # %cond_true
; JUMP1-NEXT: jmp bar # TAILCALL
@@ -50,11 +49,10 @@ define void @unpredictable(i32 %X, i32 %Y, i32 %Z) nounwind {
; JUMP2-LABEL: unpredictable:
; JUMP2: # %bb.0: # %entry
; JUMP2-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; JUMP2-NEXT: sete %al
-; JUMP2-NEXT: cmpl $5, {{[0-9]+}}(%esp)
-; JUMP2-NEXT: setl %cl
-; JUMP2-NEXT: orb %al, %cl
-; JUMP2-NEXT: cmpb $1, %cl
+; JUMP2-NEXT: setne %al
+; JUMP2-NEXT: cmpl $4, {{[0-9]+}}(%esp)
+; JUMP2-NEXT: setg %cl
+; JUMP2-NEXT: testb %al, %cl
; JUMP2-NEXT: jne .LBB1_1
; JUMP2-NEXT: # %bb.2: # %cond_true
; JUMP2-NEXT: jmp bar # TAILCALL
@@ -64,11 +62,10 @@ define void @unpredictable(i32 %X, i32 %Y, i32 %Z) nounwind {
; JUMP1-LABEL: unpredictable:
; JUMP1: # %bb.0: # %entry
; JUMP1-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; JUMP1-NEXT: sete %al
-; JUMP1-NEXT: cmpl $5, {{[0-9]+}}(%esp)
-; JUMP1-NEXT: setl %cl
-; JUMP1-NEXT: orb %al, %cl
-; JUMP1-NEXT: cmpb $1, %cl
+; JUMP1-NEXT: setne %al
+; JUMP1-NEXT: cmpl $4, {{[0-9]+}}(%esp)
+; JUMP1-NEXT: setg %cl
+; JUMP1-NEXT: testb %al, %cl
; JUMP1-NEXT: jne .LBB1_1
; JUMP1-NEXT: # %bb.2: # %cond_true
; JUMP1-NEXT: jmp bar # TAILCALL
OpenPOWER on IntegriCloud