7 files changed, 197 insertions, 14 deletions
diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h
index a56fdf9f222..85b6e954dcd 100644
--- a/llvm/include/llvm/Target/TargetLowering.h
+++ b/llvm/include/llvm/Target/TargetLowering.h
@@ -1097,6 +1097,25 @@ public:
   virtual LoadInst *lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const {
     return nullptr;
   }
+
+  /// Returns true if we should normalize
+  /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
+  /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely
+  /// that it saves us from materializing N0 and N1 in an integer register.
+  /// Targets that are able to perform and/or on flags should return false here.
+  virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context,
+                                               EVT VT) const {
+    // If a target has multiple condition registers, then it likely has logical
+    // operations on those registers.
+    if (hasMultipleConditionRegisters())
+      return false;
+    // Only do the transform if the value won't be split into multiple
+    // registers.
+    LegalizeTypeAction Action = getTypeAction(Context, VT);
+    return Action != TypeExpandInteger && Action != TypeExpandFloat &&
+      Action != TypeSplitVector;
+  }
+
   //===--------------------------------------------------------------------===//
   // TargetLowering Configuration Methods - These methods should be invoked by
   // the derived class constructor to configure this object for the target.
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e247961a7ba..64228a1aa9b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4819,6 +4819,69 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
     return SimplifySelect(SDLoc(N), N0, N1, N2);
   }
 
+  if (VT0 == MVT::i1) {
+    if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+      // select (and Cond0, Cond1), X, Y
+      //   -> select Cond0, (select Cond1, X, Y), Y
+      if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
+        SDValue Cond0 = N0->getOperand(0);
+        SDValue Cond1 = N0->getOperand(1);
+        SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+                                          N1.getValueType(), Cond1, N1, N2);
+        return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
+                           InnerSelect, N2);
+      }
+      // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
+      if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
+        SDValue Cond0 = N0->getOperand(0);
+        SDValue Cond1 = N0->getOperand(1);
+        SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+                                          N1.getValueType(), Cond1, N1, N2);
+        return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
+                           InnerSelect);
+      }
+    }
+
+    // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
+    if (N1->getOpcode() == ISD::SELECT) {
+      SDValue N1_0 = N1->getOperand(0);
+      SDValue N1_1 = N1->getOperand(1);
+      SDValue N1_2 = N1->getOperand(2);
+      if (N1_2 == N2) {
+        // Create the actual and node if we can generate good code for it.
+        if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+          SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
+                                    N0, N1_0);
+          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
+                             N1_1, N2);
+        }
+        // Otherwise see if we can optimize the "and" to a better pattern.
+        if (SDValue Combined = visitANDLike(N0, N1_0, N))
+          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
+                             N1_1, N2);
+      }
+    }
+    // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
+    if (N2->getOpcode() == ISD::SELECT) {
+      SDValue N2_0 = N2->getOperand(0);
+      SDValue N2_1 = N2->getOperand(1);
+      SDValue N2_2 = N2->getOperand(2);
+      if (N2_1 == N1) {
+        // Create the actual or node if we can generate good code for it.
+        if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+          SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
+                                   N0, N2_0);
+          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
+                             N1, N2_2);
+        }
+        // Otherwise see if we can optimize to a better pattern.
+        if (SDValue Combined = visitORLike(N0, N2_0, N))
+          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
+                             N1, N2_2);
+      }
+    }
+  }
+
   return SDValue();
 }
 
diff --git a/llvm/test/CodeGen/ARM/movcc-double.ll b/llvm/test/CodeGen/ARM/movcc-double.ll
new file mode 100644
index 00000000000..9ce708d9bd3
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/movcc-double.ll
@@ -0,0 +1,50 @@
+; RUN: llc -o - %s | FileCheck %s
+target triple = "arm-unknown-unknown"
+
+; select with and i1/or i1 condition should be implemented as a series of 2
+; cmovs, not by producing two conditions and using and on them.
+
+define i32 @select_and(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) {
+; CHECK-LABEL: select_and
+; CHECK-NOT: tst
+; CHECK-NOT: movne
+; CHECK: mov{{lo|hs}}
+; CHECK: mov{{lo|hs}}
+  %cmp0 = icmp ult i32 %a0, %a1
+  %cmp1 = icmp ult i32 %a2, %a3
+  %and = and i1 %cmp0, %cmp1
+  %res = select i1 %and, i32 %a4, i32 %a5
+  ret i32 %res
+}
+
+define i32 @select_or(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) {
+; select with and i1 condition should be implemented as a series of 2 cmovs, not
+; by producing two conditions and using and on them.
+; CHECK-LABEL: select_or
+; CHECK-NOT: orss
+; CHECK-NOT: tst
+; CHECK: mov{{lo|hs}}
+; CHECK: mov{{lo|hs}}
+  %cmp0 = icmp ult i32 %a0, %a1
+  %cmp1 = icmp ult i32 %a2, %a3
+  %and = or i1 %cmp0, %cmp1
+  %res = select i1 %and, i32 %a4, i32 %a5
+  ret i32 %res
+}
+
+; If one of the conditions is materialized as a 0/1 value anyway, then the
+; sequence of 2 cmovs should not be used.
+
+@var32 = global i32 0
+define i32 @select_noopt(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+; CHECK-LABEL: select_noopt
+; CHECK: orrs
+; CHECK: movne
+  %cmp0 = icmp ult i32 %a0, %a1
+  %cmp1 = icmp ult i32 %a1, %a2
+  %or = or i1 %cmp0, %cmp1
+  %zero_one = zext i1 %or to i32
+  store volatile i32 %zero_one, i32* @var32
+  %res = select i1 %or, i32 %a3, i32 %a4
+  ret i32 %res
+}
diff --git a/llvm/test/CodeGen/R600/or.ll b/llvm/test/CodeGen/R600/or.ll
index 1b1cb9a83cb..1337adb7b45 100644
--- a/llvm/test/CodeGen/R600/or.ll
+++ b/llvm/test/CodeGen/R600/or.ll
@@ -156,14 +156,14 @@ define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
 ; EG: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
 
 ; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
-define void @or_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
+define void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
   %a = load float, float addrspace(1)* %in0
   %b = load float, float addrspace(1)* %in1
   %acmp = fcmp oge float %a, 0.000000e+00
   %bcmp = fcmp oge float %b, 0.000000e+00
   %or = or i1 %acmp, %bcmp
-  %result = select i1 %or, float %a, float %b
-  store float %result, float addrspace(1)* %out
+  %result = zext i1 %or to i32
+  store i32 %result, i32 addrspace(1)* %out
   ret void
 }
 
diff --git a/llvm/test/CodeGen/X86/cmov-double.ll b/llvm/test/CodeGen/X86/cmov-double.ll
new file mode 100644
index 00000000000..994a027596c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/cmov-double.ll
@@ -0,0 +1,52 @@
+; RUN: llc -o - %s | FileCheck %s
+target triple = "x86_64-unknown-unknown"
+
+; select with and i1/or i1 condition should be implemented as a series of 2
+; cmovs, not by producing two conditions and using and on them.
+
+define i32 @select_and(i32 %a0, i32 %a1, float %a2, float %a3, i32 %a4, i32 %a5) {
+; CHECK-LABEL: select_and
+; CHECK-NOT: set
+; CHECK-NOT: and[lb]
+; CHECK-NOT: test
+; CHECK: cmov
+; CHECK: cmov
+  %cmp0 = icmp ult i32 %a0, %a1
+  %cmp1 = fcmp olt float %a2, %a3
+  %and = and i1 %cmp0, %cmp1
+  %res = select i1 %and, i32 %a4, i32 %a5
+  ret i32 %res
+}
+
+define i32 @select_or(i32 %a0, i32 %a1, float %a2, float %a3, i32 %a4, i32 %a5) {
+; select with and i1 condition should be implemented as a series of 2 cmovs, not
+; by producing two conditions and using and on them.
+; CHECK-LABEL: select_or
+; CHECK-NOT: set
+; CHECK-NOT: or[lb]
+; CHECK-NOT: test
+; CHECK: cmov
+; CHECK: cmov
+  %cmp0 = icmp ult i32 %a0, %a1
+  %cmp1 = fcmp olt float %a2, %a3
+  %and = or i1 %cmp0, %cmp1
+  %res = select i1 %and, i32 %a4, i32 %a5
+  ret i32 %res
+}
+
+; If one of the conditions is materialized as a 0/1 value anyway, then the
+; sequence of 2 cmovs should not be used.
+
+@var32 = global i32 0
+define i32 @select_noopt(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+; CHECK-LABEL: select_noopt
+; CHECK: cmov
+; CHECK-NOT: cmov
+  %cmp0 = icmp ult i32 %a0, %a1
+  %cmp1 = icmp ult i32 %a1, %a2
+  %or = or i1 %cmp0, %cmp1
+  %zero_one = zext i1 %or to i32
+  store volatile i32 %zero_one, i32* @var32
+  %res = select i1 %or, i32 %a3, i32 %a4
+  ret i32 %res
+}
diff --git a/llvm/test/CodeGen/X86/jump_sign.ll b/llvm/test/CodeGen/X86/jump_sign.ll
index 440f1cc9b49..31a7af31790 100644
--- a/llvm/test/CodeGen/X86/jump_sign.ll
+++ b/llvm/test/CodeGen/X86/jump_sign.ll
@@ -217,17 +217,15 @@ entry:
 ; PR13475
 ; If we have sub a, b and cmp b, a and the result of cmp is used
 ; by sbb, we should not optimize cmp away.
-define i32 @func_q(i32 %j.4, i32 %w, i32 %el) {
+define i32 @func_q(i32 %a0, i32 %a1, i32 %a2) {
 ; CHECK-LABEL: func_q:
 ; CHECK: cmp
 ; CHECK-NEXT: sbb
-  %tmp532 = add i32 %j.4, %w
-  %tmp533 = icmp ugt i32 %tmp532, %el
-  %tmp534 = icmp ult i32 %w, %el
-  %or.cond = and i1 %tmp533, %tmp534
-  %tmp535 = sub i32 %el, %w
-  %j.5 = select i1 %or.cond, i32 %tmp535, i32 %j.4
-  ret i32 %j.5
+  %1 = icmp ult i32 %a0, %a1
+  %2 = sub i32 %a1, %a0
+  %3 = select i1 %1, i32 -1, i32 0
+  %4 = xor i32 %2, %3
+  ret i32 %4
 }
 ; rdar://11873276
 define i8* @func_r(i8* %base, i32* nocapture %offset, i32 %size) nounwind {
diff --git a/llvm/test/CodeGen/X86/zext-sext.ll b/llvm/test/CodeGen/X86/zext-sext.ll
index 2758bff8024..01f871159d3 100644
--- a/llvm/test/CodeGen/X86/zext-sext.ll
+++ b/llvm/test/CodeGen/X86/zext-sext.ll
@@ -34,11 +34,12 @@ entry:
   %tmp12 = add i64 %tmp11, 5089792279245435153
 
 ; CHECK:      addl	$2138875574, %e[[REGISTER_zext:[a-z0-9]+]]
-; CHECK:      movslq	%e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]]
 ; CHECK:      cmpl	$-8608074, %e[[REGISTER_zext]]
+; CHECK:      movslq	%e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]]
 ; CHECK-NOT:  [[REGISTER_zext]]
-; CHECK-DAG:  testl     %e[[REGISTER_zext]]
-; CHECK:      subq	%r[[REGISTER_zext]], [[REGISTER_sext]]
+; CHECK-DAG:  cmpl	$2138875573, %e[[REGISTER_zext]]
+; CHECK:      movq  [[REGISTER_sext]], [[REGISTER_sext2:%[a-z0-9]+]]
+; CHECK:      subq	%r[[REGISTER_zext]], [[REGISTER_sext2]]
 
   %tmp13 = sub i64 %tmp12, 2138875574
   %tmp14 = zext i32 %tmp4 to i64