diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 36 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.td | 20 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/clz.ll | 12 | 
3 files changed, 53 insertions, 15 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1d64b1d859c..c2f2736b88b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5352,15 +5352,26 @@ SDOperand X86TargetLowering::LowerCTLZ(SDOperand Op, SelectionDAG &DAG) {    Op = Op.getOperand(0);    if (VT == MVT::i8) { +    // Zero extend to i32 since there is not an i8 bsr.      OpVT = MVT::i32;      Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op);    } -  if (VT == MVT::i32 || VT == MVT::i64) -    return DAG.getNode(ISD::XOR, OpVT, DAG.getNode(X86ISD::BSR, OpVT, Op), -                       DAG.getConstant(NumBits-1, OpVT)); -  Op = DAG.getNode(ISD::SUB, OpVT, DAG.getConstant(NumBits-1, OpVT), -                   DAG.getNode(X86ISD::BSR, OpVT, Op)); +  // Issue a bsr (scan bits in reverse) which also sets EFLAGS. +  SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); +  Op = DAG.getNode(X86ISD::BSR, VTs, Op); + +  // If src is zero (i.e. bsr sets ZF), returns NumBits. +  SmallVector<SDOperand, 4> Ops; +  Ops.push_back(Op); +  Ops.push_back(DAG.getConstant(NumBits+NumBits-1, OpVT)); +  Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8)); +  Ops.push_back(Op.getValue(1)); +  Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4); + +  // Finally xor with NumBits-1. +  Op = DAG.getNode(ISD::XOR, OpVT, Op, DAG.getConstant(NumBits-1, OpVT)); +    if (VT == MVT::i8)      Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op);    return Op; @@ -5369,13 +5380,26 @@ SDOperand X86TargetLowering::LowerCTLZ(SDOperand Op, SelectionDAG &DAG) {  SDOperand X86TargetLowering::LowerCTTZ(SDOperand Op, SelectionDAG &DAG) {    MVT::ValueType VT = Op.getValueType();    MVT::ValueType OpVT = VT; +  unsigned NumBits = MVT::getSizeInBits(VT);    Op = Op.getOperand(0);    if (VT == MVT::i8) {      OpVT = MVT::i32;      Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op);    } -  Op = DAG.getNode(X86ISD::BSF, OpVT, Op); + +  // Issue a bsf (scan bits forward) which also sets EFLAGS. +  SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); +  Op = DAG.getNode(X86ISD::BSF, VTs, Op); + +  // If src is zero (i.e. bsf sets ZF), returns NumBits. +  SmallVector<SDOperand, 4> Ops; +  Ops.push_back(Op); +  Ops.push_back(DAG.getConstant(NumBits, OpVT)); +  Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8)); +  Ops.push_back(Op.getValue(1)); +  Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4); +    if (VT == MVT::i8)      Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op);    return Op; diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 28ef8388a90..a0d72b21aee 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -451,29 +451,33 @@ def XCHG32rm : I<0x87, MRMSrcMem,  let Defs = [EFLAGS] in {  def BSF16rr  : I<0xBC, AddRegFrm, (outs GR16:$dst), (ins GR16:$src),                   "bsf{w}\t{$src, $dst||$dst, $src}", -                 [(set GR16:$dst, (X86bsf GR16:$src))]>, TB; +                 [(set GR16:$dst, (X86bsf GR16:$src)), (implicit EFLAGS)]>, TB;  def BSF16rm  : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),                   "bsf{w}\t{$src, $dst||$dst, $src}", -                 [(set GR16:$dst, (X86bsf (loadi16 addr:$src)))]>, TB; +                 [(set GR16:$dst, (X86bsf (loadi16 addr:$src))), +                  (implicit EFLAGS)]>, TB;  def BSF32rr  : I<0xBC, AddRegFrm, (outs GR32:$dst), (ins GR32:$src),                   "bsf{l}\t{$src, $dst||$dst, $src}", -                 [(set GR32:$dst, (X86bsf GR32:$src))]>, TB; +                 [(set GR32:$dst, (X86bsf GR32:$src)), (implicit EFLAGS)]>, TB;  def BSF32rm  : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),                   "bsf{l}\t{$src, $dst||$dst, $src}", -                 [(set GR32:$dst, (X86bsf (loadi32 addr:$src)))]>, TB; +                 [(set GR32:$dst, (X86bsf (loadi32 addr:$src))), +                  (implicit EFLAGS)]>, TB;  def BSR16rr  : I<0xBD, AddRegFrm, (outs GR16:$dst), (ins GR16:$src),                   "bsr{w}\t{$src, $dst||$dst, $src}", -                 [(set GR16:$dst, (X86bsr GR16:$src))]>, TB; +                 [(set GR16:$dst, (X86bsr GR16:$src)), (implicit EFLAGS)]>, TB;  def BSR16rm  : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),                   "bsr{w}\t{$src, $dst||$dst, $src}", -                 [(set GR16:$dst, (X86bsr (loadi16 addr:$src)))]>, TB; +                 [(set GR16:$dst, (X86bsr (loadi16 addr:$src))), +                  (implicit EFLAGS)]>, TB;  def BSR32rr  : I<0xBD, AddRegFrm, (outs GR32:$dst), (ins GR32:$src),                   "bsr{l}\t{$src, $dst||$dst, $src}", -                 [(set GR32:$dst, (X86bsr GR32:$src))]>, TB; +                 [(set GR32:$dst, (X86bsr GR32:$src)), (implicit EFLAGS)]>, TB;  def BSR32rm  : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),                   "bsr{l}\t{$src, $dst||$dst, $src}", -                 [(set GR32:$dst, (X86bsr (loadi32 addr:$src)))]>, TB; +                 [(set GR32:$dst, (X86bsr (loadi32 addr:$src))), +                  (implicit EFLAGS)]>, TB;  } // Defs = [EFLAGS]  def LEA16r   : I<0x8D, MRMSrcMem, diff --git a/llvm/test/CodeGen/X86/clz.ll b/llvm/test/CodeGen/X86/clz.ll index 0505529cc48..c3b3b412f2a 100644 --- a/llvm/test/CodeGen/X86/clz.ll +++ b/llvm/test/CodeGen/X86/clz.ll @@ -1,5 +1,6 @@ -; RUN: llvm-as < %s | llc -march=x86 | grep bsr +; RUN: llvm-as < %s | llc -march=x86 | grep bsr | count 2  ; RUN: llvm-as < %s | llc -march=x86 | grep bsf +; RUN: llvm-as < %s | llc -march=x86 | grep cmov | count 3  define i32 @t1(i32 %x) nounwind  {  	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x ) @@ -14,3 +15,12 @@ define i32 @t2(i32 %x) nounwind  {  }  declare i32 @llvm.cttz.i32(i32) nounwind readnone  + +define i16 @t3(i16 %x, i16 %y) nounwind  { +entry: +        %tmp1 = add i16 %x, %y +	%tmp2 = tail call i16 @llvm.ctlz.i16( i16 %tmp1 )		; <i16> [#uses=1] +	ret i16 %tmp2 +} + +declare i16 @llvm.ctlz.i16(i16) nounwind readnone   | 

