summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/Target/TargetLowering.h19
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp63
-rw-r--r--llvm/test/CodeGen/ARM/movcc-double.ll50
-rw-r--r--llvm/test/CodeGen/R600/or.ll6
-rw-r--r--llvm/test/CodeGen/X86/cmov-double.ll52
-rw-r--r--llvm/test/CodeGen/X86/jump_sign.ll14
-rw-r--r--llvm/test/CodeGen/X86/zext-sext.ll7
7 files changed, 197 insertions, 14 deletions
diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h
index a56fdf9f222..85b6e954dcd 100644
--- a/llvm/include/llvm/Target/TargetLowering.h
+++ b/llvm/include/llvm/Target/TargetLowering.h
@@ -1097,6 +1097,25 @@ public:
virtual LoadInst *lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const {
return nullptr;
}
+
+ /// Returns true if we should normalize
+ /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
+ /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely
+ /// that it saves us from materializing N0 and N1 in an integer register.
+ /// Targets that are able to perform and/or on flags should return false here.
+ virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context,
+ EVT VT) const {
+ // If a target has multiple condition registers, then it likely has logical
+ // operations on those registers.
+ if (hasMultipleConditionRegisters())
+ return false;
+ // Only do the transform if the value won't be split into multiple
+ // registers.
+ LegalizeTypeAction Action = getTypeAction(Context, VT);
+ return Action != TypeExpandInteger && Action != TypeExpandFloat &&
+ Action != TypeSplitVector;
+ }
+
//===--------------------------------------------------------------------===//
// TargetLowering Configuration Methods - These methods should be invoked by
// the derived class constructor to configure this object for the target.
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e247961a7ba..64228a1aa9b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4819,6 +4819,69 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
return SimplifySelect(SDLoc(N), N0, N1, N2);
}
+ if (VT0 == MVT::i1) {
+ if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+ // select (and Cond0, Cond1), X, Y
+ // -> select Cond0, (select Cond1, X, Y), Y
+ if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
+ SDValue Cond0 = N0->getOperand(0);
+ SDValue Cond1 = N0->getOperand(1);
+ SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+ N1.getValueType(), Cond1, N1, N2);
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
+ InnerSelect, N2);
+ }
+ // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
+ if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
+ SDValue Cond0 = N0->getOperand(0);
+ SDValue Cond1 = N0->getOperand(1);
+ SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+ N1.getValueType(), Cond1, N1, N2);
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
+ InnerSelect);
+ }
+ }
+
+ // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
+ if (N1->getOpcode() == ISD::SELECT) {
+ SDValue N1_0 = N1->getOperand(0);
+ SDValue N1_1 = N1->getOperand(1);
+ SDValue N1_2 = N1->getOperand(2);
+ if (N1_2 == N2) {
+ // Create the actual and node if we can generate good code for it.
+ if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
+ N0, N1_0);
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
+ N1_1, N2);
+ }
+ // Otherwise see if we can optimize the "and" to a better pattern.
+ if (SDValue Combined = visitANDLike(N0, N1_0, N))
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
+ N1_1, N2);
+ }
+ }
+ // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
+ if (N2->getOpcode() == ISD::SELECT) {
+ SDValue N2_0 = N2->getOperand(0);
+ SDValue N2_1 = N2->getOperand(1);
+ SDValue N2_2 = N2->getOperand(2);
+ if (N2_1 == N1) {
+ // Create the actual or node if we can generate good code for it.
+ if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+ SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
+ N0, N2_0);
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
+ N1, N2_2);
+ }
+ // Otherwise see if we can optimize to a better pattern.
+ if (SDValue Combined = visitORLike(N0, N2_0, N))
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
+ N1, N2_2);
+ }
+ }
+ }
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/ARM/movcc-double.ll b/llvm/test/CodeGen/ARM/movcc-double.ll
new file mode 100644
index 00000000000..9ce708d9bd3
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/movcc-double.ll
@@ -0,0 +1,50 @@
+; RUN: llc -o - %s | FileCheck %s
+target triple = "arm-unknown-unknown"
+
+; select with and i1/or i1 condition should be implemented as a series of 2
+; cmovs, not by producing two conditions and using and on them.
+
+define i32 @select_and(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) {
+; CHECK-LABEL: select_and
+; CHECK-NOT: tst
+; CHECK-NOT: movne
+; CHECK: mov{{lo|hs}}
+; CHECK: mov{{lo|hs}}
+ %cmp0 = icmp ult i32 %a0, %a1
+ %cmp1 = icmp ult i32 %a2, %a3
+ %and = and i1 %cmp0, %cmp1
+ %res = select i1 %and, i32 %a4, i32 %a5
+ ret i32 %res
+}
+
+define i32 @select_or(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) {
+; select with and i1 condition should be implemented as a series of 2 cmovs, not
+; by producing two conditions and using and on them.
+; CHECK-LABEL: select_or
+; CHECK-NOT: orss
+; CHECK-NOT: tst
+; CHECK: mov{{lo|hs}}
+; CHECK: mov{{lo|hs}}
+ %cmp0 = icmp ult i32 %a0, %a1
+ %cmp1 = icmp ult i32 %a2, %a3
+ %and = or i1 %cmp0, %cmp1
+ %res = select i1 %and, i32 %a4, i32 %a5
+ ret i32 %res
+}
+
+; If one of the conditions is materialized as a 0/1 value anyway, then the
+; sequence of 2 cmovs should not be used.
+
+@var32 = global i32 0
+define i32 @select_noopt(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+; CHECK-LABEL: select_noopt
+; CHECK: orrs
+; CHECK: movne
+ %cmp0 = icmp ult i32 %a0, %a1
+ %cmp1 = icmp ult i32 %a1, %a2
+ %or = or i1 %cmp0, %cmp1
+ %zero_one = zext i1 %or to i32
+ store volatile i32 %zero_one, i32* @var32
+ %res = select i1 %or, i32 %a3, i32 %a4
+ ret i32 %res
+}
diff --git a/llvm/test/CodeGen/R600/or.ll b/llvm/test/CodeGen/R600/or.ll
index 1b1cb9a83cb..1337adb7b45 100644
--- a/llvm/test/CodeGen/R600/or.ll
+++ b/llvm/test/CodeGen/R600/or.ll
@@ -156,14 +156,14 @@ define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
; EG: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
-define void @or_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
+define void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
%a = load float, float addrspace(1)* %in0
%b = load float, float addrspace(1)* %in1
%acmp = fcmp oge float %a, 0.000000e+00
%bcmp = fcmp oge float %b, 0.000000e+00
%or = or i1 %acmp, %bcmp
- %result = select i1 %or, float %a, float %b
- store float %result, float addrspace(1)* %out
+ %result = zext i1 %or to i32
+ store i32 %result, i32 addrspace(1)* %out
ret void
}
diff --git a/llvm/test/CodeGen/X86/cmov-double.ll b/llvm/test/CodeGen/X86/cmov-double.ll
new file mode 100644
index 00000000000..994a027596c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/cmov-double.ll
@@ -0,0 +1,52 @@
+; RUN: llc -o - %s | FileCheck %s
+target triple = "x86_64-unknown-unknown"
+
+; select with and i1/or i1 condition should be implemented as a series of 2
+; cmovs, not by producing two conditions and using and on them.
+
+define i32 @select_and(i32 %a0, i32 %a1, float %a2, float %a3, i32 %a4, i32 %a5) {
+; CHECK-LABEL: select_and
+; CHECK-NOT: set
+; CHECK-NOT: and[lb]
+; CHECK-NOT: test
+; CHECK: cmov
+; CHECK: cmov
+ %cmp0 = icmp ult i32 %a0, %a1
+ %cmp1 = fcmp olt float %a2, %a3
+ %and = and i1 %cmp0, %cmp1
+ %res = select i1 %and, i32 %a4, i32 %a5
+ ret i32 %res
+}
+
+define i32 @select_or(i32 %a0, i32 %a1, float %a2, float %a3, i32 %a4, i32 %a5) {
+; select with and i1 condition should be implemented as a series of 2 cmovs, not
+; by producing two conditions and using and on them.
+; CHECK-LABEL: select_or
+; CHECK-NOT: set
+; CHECK-NOT: or[lb]
+; CHECK-NOT: test
+; CHECK: cmov
+; CHECK: cmov
+ %cmp0 = icmp ult i32 %a0, %a1
+ %cmp1 = fcmp olt float %a2, %a3
+ %and = or i1 %cmp0, %cmp1
+ %res = select i1 %and, i32 %a4, i32 %a5
+ ret i32 %res
+}
+
+; If one of the conditions is materialized as a 0/1 value anyway, then the
+; sequence of 2 cmovs should not be used.
+
+@var32 = global i32 0
+define i32 @select_noopt(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+; CHECK-LABEL: select_noopt
+; CHECK: cmov
+; CHECK-NOT: cmov
+ %cmp0 = icmp ult i32 %a0, %a1
+ %cmp1 = icmp ult i32 %a1, %a2
+ %or = or i1 %cmp0, %cmp1
+ %zero_one = zext i1 %or to i32
+ store volatile i32 %zero_one, i32* @var32
+ %res = select i1 %or, i32 %a3, i32 %a4
+ ret i32 %res
+}
diff --git a/llvm/test/CodeGen/X86/jump_sign.ll b/llvm/test/CodeGen/X86/jump_sign.ll
index 440f1cc9b49..31a7af31790 100644
--- a/llvm/test/CodeGen/X86/jump_sign.ll
+++ b/llvm/test/CodeGen/X86/jump_sign.ll
@@ -217,17 +217,15 @@ entry:
; PR13475
; If we have sub a, b and cmp b, a and the result of cmp is used
; by sbb, we should not optimize cmp away.
-define i32 @func_q(i32 %j.4, i32 %w, i32 %el) {
+define i32 @func_q(i32 %a0, i32 %a1, i32 %a2) {
; CHECK-LABEL: func_q:
; CHECK: cmp
; CHECK-NEXT: sbb
- %tmp532 = add i32 %j.4, %w
- %tmp533 = icmp ugt i32 %tmp532, %el
- %tmp534 = icmp ult i32 %w, %el
- %or.cond = and i1 %tmp533, %tmp534
- %tmp535 = sub i32 %el, %w
- %j.5 = select i1 %or.cond, i32 %tmp535, i32 %j.4
- ret i32 %j.5
+ %1 = icmp ult i32 %a0, %a1
+ %2 = sub i32 %a1, %a0
+ %3 = select i1 %1, i32 -1, i32 0
+ %4 = xor i32 %2, %3
+ ret i32 %4
}
; rdar://11873276
define i8* @func_r(i8* %base, i32* nocapture %offset, i32 %size) nounwind {
diff --git a/llvm/test/CodeGen/X86/zext-sext.ll b/llvm/test/CodeGen/X86/zext-sext.ll
index 2758bff8024..01f871159d3 100644
--- a/llvm/test/CodeGen/X86/zext-sext.ll
+++ b/llvm/test/CodeGen/X86/zext-sext.ll
@@ -34,11 +34,12 @@ entry:
%tmp12 = add i64 %tmp11, 5089792279245435153
; CHECK: addl $2138875574, %e[[REGISTER_zext:[a-z0-9]+]]
-; CHECK: movslq %e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]]
; CHECK: cmpl $-8608074, %e[[REGISTER_zext]]
+; CHECK: movslq %e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]]
; CHECK-NOT: [[REGISTER_zext]]
-; CHECK-DAG: testl %e[[REGISTER_zext]]
-; CHECK: subq %r[[REGISTER_zext]], [[REGISTER_sext]]
+; CHECK-DAG: cmpl $2138875573, %e[[REGISTER_zext]]
+; CHECK: movq [[REGISTER_sext]], [[REGISTER_sext2:%[a-z0-9]+]]
+; CHECK: subq %r[[REGISTER_zext]], [[REGISTER_sext2]]
%tmp13 = sub i64 %tmp12, 2138875574
%tmp14 = zext i32 %tmp4 to i64
OpenPOWER on IntegriCloud