diff options
| -rw-r--r-- | llvm/include/llvm/Target/TargetLowering.h | 2 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 50 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 43 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 70 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/bt.ll | 420 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/commute-cmov.ll | 17 | 
6 files changed, 562 insertions, 40 deletions
| diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h index e6f18ea43f8..4ec7d3f6279 100644 --- a/llvm/include/llvm/Target/TargetLowering.h +++ b/llvm/include/llvm/Target/TargetLowering.h @@ -780,6 +780,8 @@ public:      SDValue CombineTo(SDNode *N, const std::vector<SDValue> &To);      SDValue CombineTo(SDNode *N, SDValue Res);      SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1); + +    void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO);    };    /// SimplifySetCC - Try to simplify a setcc built with the specified operands  diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 48e556b2362..848051940f7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -102,6 +102,8 @@ namespace {        SDValue To[] = { Res0, Res1 };        return CombineTo(N, To, 2, AddTo);      } + +    void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);    private:     @@ -298,6 +300,10 @@ CombineTo(SDNode *N, SDValue Res0, SDValue Res1) {    return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1);  } +void TargetLowering::DAGCombinerInfo:: +CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { +  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); +}  //===----------------------------------------------------------------------===//  // Helper Functions @@ -539,29 +545,14 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,    return SDValue(N, 0);  } -/// SimplifyDemandedBits - Check the specified integer node value to see if -/// it can be simplified or if things it uses can be simplified by bit -/// propagation.  If so, return true. -bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { -  TargetLowering::TargetLoweringOpt TLO(DAG); -  APInt KnownZero, KnownOne; -  if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) -    return false; -   -  // Revisit the node. -  AddToWorkList(Op.getNode()); -   -  // Replace the old value with the new one. -  ++NodesCombined; -  DOUT << "\nReplacing.2 "; DEBUG(TLO.Old.getNode()->dump(&DAG)); -  DOUT << "\nWith: "; DEBUG(TLO.New.getNode()->dump(&DAG)); -  DOUT << '\n'; -   +void +DAGCombiner::CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt & +                                                                          TLO) {    // Replace all uses.  If any nodes become isomorphic to other nodes and     // are deleted, make sure to remove them from our worklist.    WorkListRemover DeadNodes(*this);    DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes); -   +    // Push the new node and any (possibly new) users onto the worklist.    AddToWorkList(TLO.New.getNode());    AddUsersToWorkList(TLO.New.getNode()); @@ -580,6 +571,27 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {      DAG.DeleteNode(TLO.Old.getNode());    } +} + +/// SimplifyDemandedBits - Check the specified integer node value to see if +/// it can be simplified or if things it uses can be simplified by bit +/// propagation.  If so, return true. +bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { +  TargetLowering::TargetLoweringOpt TLO(DAG); +  APInt KnownZero, KnownOne; +  if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) +    return false; +   +  // Revisit the node. +  AddToWorkList(Op.getNode()); +   +  // Replace the old value with the new one. +  ++NodesCombined; +  DOUT << "\nReplacing.2 "; DEBUG(TLO.Old.getNode()->dump(&DAG)); +  DOUT << "\nWith: "; DEBUG(TLO.New.getNode()->dump(&DAG)); +  DOUT << '\n'; +   +  CommitTargetLoweringOpt(TLO);    return true;  } diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 7245244684d..e479e05bdd7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -724,7 +724,7 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {  bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,                                                           const APInt &Demanded) {    // FIXME: ISD::SELECT, ISD::SELECT_CC -  switch(Op.getOpcode()) { +  switch (Op.getOpcode()) {    default: break;    case ISD::AND:    case ISD::OR: @@ -1054,6 +1054,14 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,      }      break;    case ISD::SRA: +    // If this is an arithmetic shift right and only the low-bit is set, we can +    // always convert this into a logical shr, even if the shift amount is +    // variable.  The low bit of the shift cannot be an input sign bit unless +    // the shift amount is >= the size of the datatype, which is undefined. +    if (DemandedMask == 1) +      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, Op.getValueType(), +                                               Op.getOperand(0), Op.getOperand(1))); +      if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {        MVT VT = Op.getValueType();        unsigned ShAmt = SA->getZExtValue(); @@ -1332,6 +1340,21 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,    return 1;  } +static bool ValueHasAtMostOneBitSet(SDValue Val, const SelectionDAG &DAG) { +  // Logical shift right or left won't ever introduce new set bits. +  // We check for this case because we don't care which bits are +  // set, but ComputeMaskedBits won't know anything unless it can +  // determine which specific bits may be set. +  if (Val.getOpcode() == ISD::SHL || Val.getOpcode() == ISD::SRL) +    return ValueHasAtMostOneBitSet(Val.getOperand(0), DAG); + +  MVT OpVT = Val.getValueType(); +  unsigned BitWidth = OpVT.getSizeInBits(); +  APInt Mask = APInt::getAllOnesValue(BitWidth); +  APInt KnownZero, KnownOne; +  DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne); +  return KnownZero.countPopulation() == BitWidth - 1; +}  /// SimplifySetCC - Try to simplify a setcc built with the specified operands   /// and cc. If it is unable to simplify it, return a null SDValue. @@ -1791,6 +1814,24 @@ TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1,          }        }      } + +    // Simpify x&y == y to x&y == 0 if y has exactly one bit set. +    if (N0.getOpcode() == ISD::AND) +      if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) { +        if (ValueHasAtMostOneBitSet(N1, DAG)) { +          Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); +          SDValue Zero = DAG.getConstant(0, N1.getValueType()); +          return DAG.getSetCC(VT, N0, Zero, Cond); +        } +      } +    if (N1.getOpcode() == ISD::AND) +      if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) { +        if (ValueHasAtMostOneBitSet(N0, DAG)) { +          Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); +          SDValue Zero = DAG.getConstant(0, N0.getValueType()); +          return DAG.getSetCC(VT, N1, Zero, Cond); +        } +      }    }    // Fold away ALL boolean setcc's. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6ec97e2db60..bf7c704b9ba 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5114,22 +5114,39 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {    SDValue Op1 = Op.getOperand(1);    ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); -  // Lower (X & (1 << N)) == 0 to BT. -  // Lower ((X >>u N) & 1) != 0 to BT. -  // Lower ((X >>s N) & 1) != 0 to BT. +  // Lower (X & (1 << N)) == 0 to BT(X, N). +  // Lower ((X >>u N) & 1) != 0 to BT(X, N). +  // Lower ((X >>s N) & 1) != 0 to BT(X, N).    if (Op0.getOpcode() == ISD::AND &&        Op0.hasOneUse() &&        Op1.getOpcode() == ISD::Constant && -      Op0.getOperand(1).getOpcode() == ISD::Constant && +      cast<ConstantSDNode>(Op1)->getZExtValue() == 0 &&        (CC == ISD::SETEQ || CC == ISD::SETNE)) { -    ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1)); -    ConstantSDNode *CmpRHS = cast<ConstantSDNode>(Op1); -    SDValue AndLHS = Op0.getOperand(0); -    if (CmpRHS->getZExtValue() == 0 && AndRHS->getZExtValue() == 1 && -        AndLHS.getOpcode() == ISD::SRL) { -      SDValue LHS = AndLHS.getOperand(0); -      SDValue RHS = AndLHS.getOperand(1); +    SDValue LHS, RHS; +    if (Op0.getOperand(1).getOpcode() == ISD::SHL) { +      if (ConstantSDNode *Op010C = +            dyn_cast<ConstantSDNode>(Op0.getOperand(1).getOperand(0))) +        if (Op010C->getZExtValue() == 1) { +          LHS = Op0.getOperand(0); +          RHS = Op0.getOperand(1).getOperand(1); +        } +    } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) { +      if (ConstantSDNode *Op000C = +            dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(0))) +        if (Op000C->getZExtValue() == 1) { +          LHS = Op0.getOperand(1); +          RHS = Op0.getOperand(0).getOperand(1); +        } +    } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) { +      ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1)); +      SDValue AndLHS = Op0.getOperand(0); +      if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) { +        LHS = AndLHS.getOperand(0); +        RHS = AndLHS.getOperand(1); +      } +    } +    if (LHS.getNode()) {        // If LHS is i8, promote it to i16 with any_extend.  There is no i8 BT        // instruction.  Since the shift amount is in-range-or-undefined, we know        // that doing a bittest on the i16 value is ok.  We extend to i32 because @@ -5141,10 +5158,10 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {        // BT ignores high bits (like shifts) we can use anyextend.        if (LHS.getValueType() != RHS.getValueType())          RHS = DAG.getNode(ISD::ANY_EXTEND, LHS.getValueType(), RHS); -       +        SDValue BT = DAG.getNode(X86ISD::BT, MVT::i32, LHS, RHS);        unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B; -      return DAG.getNode(X86ISD::SETCC, MVT::i8,  +      return DAG.getNode(X86ISD::SETCC, MVT::i8,                           DAG.getConstant(Cond, MVT::i8), BT);      }    } @@ -5295,7 +5312,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) {          !isScalarFPTypeInSSEReg(VT))  // FPStack?        IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue()); -    if (isX86LogicalCmp(Opc) && !IllegalFPCMov) { +    if ((isX86LogicalCmp(Opc) && !IllegalFPCMov) || Opc == X86ISD::BT) { // FIXME        Cond = Cmp;        addTest = false;      } @@ -7547,6 +7564,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,  /// PerformBuildVectorCombine - build_vector 0,(load i64 / f64) -> movq / movsd.  static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG, +                                         TargetLowering::DAGCombinerInfo &DCI,                                           const X86Subtarget *Subtarget,                                           const TargetLowering &TLI) {    unsigned NumOps = N->getNumOperands(); @@ -7587,7 +7605,9 @@ static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG,    SDVTList Tys = DAG.getVTList(VT, MVT::Other);    SDValue Ops[] = { LD->getChain(), LD->getBasePtr() };    SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, Tys, Ops, 2); -  DAG.ReplaceAllUsesOfValueWith(SDValue(Base, 1), ResNode.getValue(1)); +  TargetLowering::TargetLoweringOpt TLO(DAG); +  TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1)); +  DCI.CommitTargetLoweringOpt(TLO);    return ResNode;  }                                            @@ -7875,6 +7895,23 @@ static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {    return SDValue();  } +static SDValue PerformBTCombine(SDNode *N, +                                SelectionDAG &DAG, +                                TargetLowering::DAGCombinerInfo &DCI) { +  // BT ignores high bits in the bit index operand. +  SDValue Op1 = N->getOperand(1); +  if (Op1.hasOneUse()) { +    unsigned BitWidth = Op1.getValueSizeInBits(); +    APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth)); +    APInt KnownZero, KnownOne; +    TargetLowering::TargetLoweringOpt TLO(DAG); +    TargetLowering &TLI = DAG.getTargetLoweringInfo(); +    if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) || +        TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO)) +      DCI.CommitTargetLoweringOpt(TLO); +  } +  return SDValue(); +}  SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,                                               DAGCombinerInfo &DCI) const { @@ -7883,7 +7920,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,    default: break;    case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);    case ISD::BUILD_VECTOR: -    return PerformBuildVectorCombine(N, DAG, Subtarget, *this); +    return PerformBuildVectorCombine(N, DAG, DCI, Subtarget, *this);    case ISD::SELECT:         return PerformSELECTCombine(N, DAG, Subtarget);    case ISD::SHL:    case ISD::SRA: @@ -7892,6 +7929,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,    case X86ISD::FXOR:    case X86ISD::FOR:         return PerformFORCombine(N, DAG);    case X86ISD::FAND:        return PerformFANDCombine(N, DAG); +  case X86ISD::BT:          return PerformBTCombine(N, DAG, DCI);    }    return SDValue(); diff --git a/llvm/test/CodeGen/X86/bt.ll b/llvm/test/CodeGen/X86/bt.ll index 86254d3295b..f91130dd69a 100644 --- a/llvm/test/CodeGen/X86/bt.ll +++ b/llvm/test/CodeGen/X86/bt.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc | grep btl +; RUN: llvm-as < %s | llc -march=x86 | grep btl | count 28  ; RUN: llvm-as < %s | llc -mcpu=pentium4 | grep btl | not grep esp  ; RUN: llvm-as < %s | llc -mcpu=penryn   | grep btl | not grep esp  ; PR3253 @@ -7,8 +7,17 @@  ; pentium4, however it is currently disabled due to the register+memory  ; form having different semantics than the register+register form. -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin8" +; Test these patterns: +;    (X & (1 << N))  != 0  -->  BT(X, N). +;    ((X >>u N) & 1) != 0  -->  BT(X, N). +; as well as several variations: +;    - The second form can use an arithmetic shift. +;    - Either form can use == instead of !=. +;    - Either form can compare with an operand of the & +;      instead of with 0. +;    - The comparison can be commuted (only cases where neither +;      operand is constant are included). +;    - The and can be commuted.  define void @test2(i32 %x, i32 %n) nounwind {  entry: @@ -25,4 +34,409 @@ UnifiedReturnBlock:		; preds = %entry  	ret void  } +define void @test2b(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1] +	%tmp3 = and i32 1, %tmp29 +	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} + +define void @atest2(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1] +	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1] +	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} + +define void @atest2b(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1] +	%tmp3 = and i32 1, %tmp29 +	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} + +define void @test3(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = shl i32 1, %n		; <i32> [#uses=1] +	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1] +	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} + +define void @test3b(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = shl i32 1, %n		; <i32> [#uses=1] +	%tmp3 = and i32 %x, %tmp29 +	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} + +define void @testne2(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1] +	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1] +	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} + +define void @testne2b(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1] +	%tmp3 = and i32 1, %tmp29 +	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} + +define void @atestne2(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1] +	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1] +	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} + +define void @atestne2b(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1] +	%tmp3 = and i32 1, %tmp29 +	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} + +define void @testne3(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = shl i32 1, %n		; <i32> [#uses=1] +	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1] +	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} + +define void @testne3b(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = shl i32 1, %n		; <i32> [#uses=1] +	%tmp3 = and i32 %x, %tmp29 +	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} + +define void @query2(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1] +	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1] +	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} + +define void @query2b(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1] +	%tmp3 = and i32 1, %tmp29 +	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} + +define void @aquery2(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1] +	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1] +	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} + +define void @aquery2b(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1] +	%tmp3 = and i32 1, %tmp29 +	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} + +define void @query3(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = shl i32 1, %n		; <i32> [#uses=1] +	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1] +	%tmp4 = icmp eq i32 %tmp3, %tmp29		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} + +define void @query3b(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = shl i32 1, %n		; <i32> [#uses=1] +	%tmp3 = and i32 %x, %tmp29 +	%tmp4 = icmp eq i32 %tmp3, %tmp29		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} + +define void @query3x(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = shl i32 1, %n		; <i32> [#uses=1] +	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1] +	%tmp4 = icmp eq i32 %tmp29, %tmp3		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} + +define void @query3bx(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = shl i32 1, %n		; <i32> [#uses=1] +	%tmp3 = and i32 %x, %tmp29 +	%tmp4 = icmp eq i32 %tmp29, %tmp3		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} + +define void @queryne2(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1] +	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1] +	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} + +define void @queryne2b(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1] +	%tmp3 = and i32 1, %tmp29 +	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} + +define void @aqueryne2(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1] +	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1] +	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} + +define void @aqueryne2b(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1] +	%tmp3 = and i32 1, %tmp29 +	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} + +define void @queryne3(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = shl i32 1, %n		; <i32> [#uses=1] +	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1] +	%tmp4 = icmp ne i32 %tmp3, %tmp29		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} + +define void @queryne3b(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = shl i32 1, %n		; <i32> [#uses=1] +	%tmp3 = and i32 %x, %tmp29 +	%tmp4 = icmp ne i32 %tmp3, %tmp29		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} + +define void @queryne3x(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = shl i32 1, %n		; <i32> [#uses=1] +	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1] +	%tmp4 = icmp ne i32 %tmp29, %tmp3		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} + +define void @queryne3bx(i32 %x, i32 %n) nounwind { +entry: +	%tmp29 = shl i32 1, %n		; <i32> [#uses=1] +	%tmp3 = and i32 %x, %tmp29 +	%tmp4 = icmp ne i32 %tmp29, %tmp3		; <i1> [#uses=1] +	br i1 %tmp4, label %bb, label %UnifiedReturnBlock + +bb:		; preds = %entry +	call void @foo() +	ret void + +UnifiedReturnBlock:		; preds = %entry +	ret void +} +  declare void @foo() diff --git a/llvm/test/CodeGen/X86/commute-cmov.ll b/llvm/test/CodeGen/X86/commute-cmov.ll index 24398dc1257..ac0e4ef3e57 100644 --- a/llvm/test/CodeGen/X86/commute-cmov.ll +++ b/llvm/test/CodeGen/X86/commute-cmov.ll @@ -1,5 +1,20 @@ -; RUN: llvm-as < %s | llc -march=x86 | grep {cmove	16(%esp)} +; RUN: llvm-as < %s | llc -march=x86 > %t +; RUN: grep btl %t | count 2 +; RUN: grep cmov %t | count 2 +; RUN: not grep test %t +; RUN: not grep set %t +; RUN: not grep j %t +; RUN: not grep cmovne %t +; RUN: not grep cmove %t +define i32 @foo(i32 %x, i32 %n, i32 %w, i32 %v) nounwind readnone { +entry: +	%0 = lshr i32 %x, %n		; <i32> [#uses=1] +	%1 = and i32 %0, 1		; <i32> [#uses=1] +	%toBool = icmp eq i32 %1, 0		; <i1> [#uses=1] +	%.0 = select i1 %toBool, i32 %v, i32 12		; <i32> [#uses=1] +	ret i32 %.0 +}  define i32 @bar(i32 %x, i32 %n, i32 %w, i32 %v) nounwind readnone {  entry:  	%0 = lshr i32 %x, %n		; <i32> [#uses=1] | 

