diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 155 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 31 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Instr64bit.td | 32 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.td | 72 | 
6 files changed, 281 insertions, 14 deletions
| diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp index e352f88afc4..1d845a5b5e4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp @@ -558,6 +558,7 @@ void ScheduleDAGSDNodes::EmitNode(SDNode *Node, bool IsClone, bool IsCloned,    case ISD::EntryToken:      llvm_unreachable("EntryToken should have been excluded from the schedule!");      break; +  case ISD::MERGE_VALUES:    case ISD::TokenFactor: // fall thru      break;    case ISD::CopyToReg: { diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 1e8429221a6..5fb496ba0b4 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -176,6 +176,7 @@ namespace {    private:      SDNode *Select(SDValue N);      SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); +    SDNode *SelectAtomicLoadAdd(SDNode *Node, MVT NVT);      bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM);      bool MatchLoad(SDValue N, X86ISelAddressMode &AM); @@ -1431,6 +1432,153 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {                                 array_lengthof(Ops));  } +SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, MVT NVT) { +  if (Node->hasAnyUseOfValue(0)) +    return 0; + +  // Optimize common patterns for __sync_add_and_fetch and +  // __sync_sub_and_fetch where the result is not used. This allows us +  // to use "lock" version of add, sub, inc, dec instructions. +  // FIXME: Do not use special instructions but instead add the "lock" +  // prefix to the target node somehow. The extra information will then be +  // transferred to machine instruction and it denotes the prefix. +  SDValue Chain = Node->getOperand(0); +  SDValue Ptr = Node->getOperand(1); +  SDValue Val = Node->getOperand(2); +  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; +  if (!SelectAddr(Ptr, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) +    return 0; + +  bool isInc = false, isDec = false, isSub = false, isCN = false; +  ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val); +  if (CN) { +    isCN = true; +    int64_t CNVal = CN->getSExtValue(); +    if (CNVal == 1) +      isInc = true; +    else if (CNVal == -1) +      isDec = true; +    else if (CNVal >= 0) +      Val = CurDAG->getTargetConstant(CNVal, NVT); +    else { +      isSub = true; +      Val = CurDAG->getTargetConstant(-CNVal, NVT); +    } +  } else if (Val.hasOneUse() && +             Val.getOpcode() == ISD::SUB && +             X86::isZeroNode(Val.getOperand(0))) { +    isSub = true; +    Val = Val.getOperand(1); +  } + +  unsigned Opc = 0; +  switch (NVT.getSimpleVT()) { +  default: return 0; +  case MVT::i8: +    if (isInc) +      Opc = X86::LOCK_INC8m; +    else if (isDec) +      Opc = X86::LOCK_DEC8m; +    else if (isSub) { +      if (isCN) +        Opc = X86::LOCK_SUB8mi; +      else +        Opc = X86::LOCK_SUB8mr; +    } else { +      if (isCN) +        Opc = X86::LOCK_ADD8mi; +      else +        Opc = X86::LOCK_ADD8mr; +    } +    break; +  case MVT::i16: +    if (isInc) +      Opc = X86::LOCK_INC16m; +    else if (isDec) +      Opc = X86::LOCK_DEC16m; +    else if (isSub) { +      if (isCN) { +        if (Predicate_i16immSExt8(Val.getNode())) +          Opc = X86::LOCK_SUB16mi8; +        else +          Opc = X86::LOCK_SUB16mi; +      } else +        Opc = X86::LOCK_SUB16mr; +    } else { +      if (isCN) { +        if (Predicate_i16immSExt8(Val.getNode())) +          Opc = X86::LOCK_ADD16mi8; +        else +          Opc = X86::LOCK_ADD16mi; +      } else +        Opc = X86::LOCK_ADD16mr; +    } +    break; +  case MVT::i32: +    if (isInc) +      Opc = X86::LOCK_INC32m; +    else if (isDec) +      Opc = X86::LOCK_DEC32m; +    else if (isSub) { +      if (isCN) { +        if (Predicate_i32immSExt8(Val.getNode())) +          Opc = X86::LOCK_SUB32mi8; +        else +          Opc = X86::LOCK_SUB32mi; +      } else +        Opc = X86::LOCK_SUB32mr; +    } else { +      if (isCN) { +        if (Predicate_i32immSExt8(Val.getNode())) +          Opc = X86::LOCK_ADD32mi8; +        else +          Opc = X86::LOCK_ADD32mi; +      } else +        Opc = X86::LOCK_ADD32mr; +    } +    break; +  case MVT::i64: +    if (isInc) +      Opc = X86::LOCK_INC64m; +    else if (isDec) +      Opc = X86::LOCK_DEC64m; +    else if (isSub) { +      Opc = X86::LOCK_SUB64mr; +      if (isCN) { +        if (Predicate_i64immSExt8(Val.getNode())) +          Opc = X86::LOCK_SUB64mi8; +        else if (Predicate_i64immSExt32(Val.getNode())) +          Opc = X86::LOCK_SUB64mi32; +      } +    } else { +      Opc = X86::LOCK_ADD64mr; +      if (isCN) { +        if (Predicate_i64immSExt8(Val.getNode())) +          Opc = X86::LOCK_ADD64mi8; +        else if (Predicate_i64immSExt32(Val.getNode())) +          Opc = X86::LOCK_ADD64mi32; +      } +    } +    break; +  } + +  DebugLoc dl = Node->getDebugLoc(); +  SDValue Undef = SDValue(CurDAG->getTargetNode(TargetInstrInfo::IMPLICIT_DEF, +                                                dl, NVT), 0); +  SDValue MemOp = CurDAG->getMemOperand(cast<MemSDNode>(Node)->getMemOperand()); +  if (isInc || isDec) { +    SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, MemOp, Chain }; +    SDValue Ret = SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Other, Ops, 7), 0); +    SDValue RetVals[] = { Undef, Ret }; +    return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); +  } else { +    SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, MemOp, Chain }; +    SDValue Ret = SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Other, Ops, 8), 0); +    SDValue RetVals[] = { Undef, Ret }; +    return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); +  } +} +  SDNode *X86DAGToDAGISel::Select(SDValue N) {    SDNode *Node = N.getNode();    MVT NVT = Node->getValueType(0); @@ -1475,6 +1623,13 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {      case X86ISD::ATOMSWAP64_DAG:        return SelectAtomic64(Node, X86::ATOMSWAP6432); +    case ISD::ATOMIC_LOAD_ADD: { +      SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT); +      if (RetVal) +        return RetVal; +      break; +    } +      case ISD::SMUL_LOHI:      case ISD::UMUL_LOHI: {        SDValue N0 = Node->getOperand(0); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c8eed721eb2..c4ed89e384b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2746,6 +2746,15 @@ unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {    return Mask;  } +/// isZeroNode - Returns true if Elt is a constant zero or a floating point +/// constant +0.0. +bool X86::isZeroNode(SDValue Elt) { +  return ((isa<ConstantSDNode>(Elt) && +           cast<ConstantSDNode>(Elt)->getZExtValue() == 0) || +          (isa<ConstantFPSDNode>(Elt) && +           cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero())); +} +  /// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in  /// their permute mask.  static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, @@ -2852,15 +2861,6 @@ static bool isSplatVector(SDNode *N) {    return true;  } -/// isZeroNode - Returns true if Elt is a constant zero or a floating point -/// constant +0.0. -static inline bool isZeroNode(SDValue Elt) { -  return ((isa<ConstantSDNode>(Elt) && -           cast<ConstantSDNode>(Elt)->getZExtValue() == 0) || -          (isa<ConstantFPSDNode>(Elt) && -           cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero())); -} -  /// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved  /// to an zero vector.   /// FIXME: move to dag combiner / method on ShuffleVectorSDNode @@ -2874,13 +2874,15 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) {        unsigned Opc = V2.getOpcode();        if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode()))          continue; -      if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V2.getOperand(Idx-NumElems))) +      if (Opc != ISD::BUILD_VECTOR || +          !X86::isZeroNode(V2.getOperand(Idx-NumElems)))          return false;      } else if (Idx >= 0) {        unsigned Opc = V1.getOpcode();        if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode()))          continue; -      if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V1.getOperand(Idx))) +      if (Opc != ISD::BUILD_VECTOR || +          !X86::isZeroNode(V1.getOperand(Idx)))          return false;      }    } @@ -3048,7 +3050,7 @@ unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, int NumElems,        continue;      }      SDValue Elt = DAG.getShuffleScalarElt(SVOp, Index); -    if (Elt.getNode() && isZeroNode(Elt)) +    if (Elt.getNode() && X86::isZeroNode(Elt))        ++NumZeros;      else        break; @@ -3221,7 +3223,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {      if (Elt.getOpcode() != ISD::Constant &&          Elt.getOpcode() != ISD::ConstantFP)        IsAllConstants = false; -    if (isZeroNode(Elt)) +    if (X86::isZeroNode(Elt))        NumZero++;      else {        NonZeros |= (1 << i); @@ -3298,7 +3300,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {      // Is it a vector logical left shift?      if (NumElems == 2 && Idx == 1 && -        isZeroNode(Op.getOperand(0)) && !isZeroNode(Op.getOperand(1))) { +        X86::isZeroNode(Op.getOperand(0)) && +        !X86::isZeroNode(Op.getOperand(1))) {        unsigned NumBits = VT.getSizeInBits();        return getVShift(true, VT,                         DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index c3da894f0e4..579b42fab27 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -336,6 +336,10 @@ namespace llvm {      /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW      /// instructions.      unsigned getShufflePSHUFLWImmediate(SDNode *N); + +    /// isZeroNode - Returns true if Elt is a constant zero or a floating point +    /// constant +0.0. +    bool isZeroNode(SDValue Elt);    }    //===--------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86Instr64bit.td b/llvm/lib/Target/X86/X86Instr64bit.td index 1dd7e07964e..427f6db51c8 100644 --- a/llvm/lib/Target/X86/X86Instr64bit.td +++ b/llvm/lib/Target/X86/X86Instr64bit.td @@ -1380,11 +1380,43 @@ def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val),                 "xadd\t$val, $ptr",                 [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>,                  TB, LOCK; +  def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val),                    "xchg\t$val, $ptr",                     [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>;  } +// Optimized codegen when the non-memory output is not used. +// FIXME: Use normal add / sub instructions and add lock prefix dynamically. +def LOCK_ADD64mr : RI<0x03, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), +                      "lock\n\t" +                      "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs), +                                      (ins i64mem:$dst, i64i8imm :$src2), +                    "lock\n\t" +                    "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD64mi32 : RIi32<0x81, MRM0m, (outs), +                                        (ins i64mem:$dst, i64i32imm :$src2), +                      "lock\n\t" +                      "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),  +                      "lock\n\t" +                      "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB64mi8 : RIi8<0x83, MRM5m, (outs), +                                      (ins i64mem:$dst, i64i8imm :$src2),  +                      "lock\n\t" +                      "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB64mi32 : RIi32<0x81, MRM5m, (outs), +                                        (ins i64mem:$dst, i64i32imm:$src2), +                      "lock\n\t" +                      "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), +                     "lock\n\t" +                     "inc{q}\t$dst", []>, LOCK; +def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), +                      "lock\n\t" +                      "dec{q}\t$dst", []>, LOCK; +  // Atomic exchange, and, or, xor  let Constraints = "$val = $dst", Defs = [EFLAGS],                    usesCustomDAGSchedInserter = 1 in { diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index c547c76ec14..49da970ca24 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -3255,6 +3255,78 @@ def LXADD8  : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val),                  TB, LOCK;  } +// Optimized codegen when the non-memory output is not used. +// FIXME: Use normal add / sub instructions and add lock prefix dynamically. +def LOCK_ADD8mr  : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), +                    "lock\n\t" +                    "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD16mr  : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), +                    "lock\n\t" +                    "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_ADD32mr  : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), +                    "lock\n\t" +                    "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD8mi   : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2), +                    "lock\n\t" +                    "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD16mi  : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2), +                    "lock\n\t" +                     "add{w}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD32mi  : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2), +                    "lock\n\t" +                    "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2), +                    "lock\n\t" +                    "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2), +                    "lock\n\t" +                    "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; + +def LOCK_INC8m  : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), +                    "lock\n\t" +                    "inc{b}\t$dst", []>, LOCK; +def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), +                    "lock\n\t" +                    "inc{w}\t$dst", []>, OpSize, LOCK; +def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), +                    "lock\n\t" +                    "inc{l}\t$dst", []>, LOCK; + +def LOCK_SUB8mr   : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2), +                    "lock\n\t" +                    "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB16mr  : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), +                    "lock\n\t" +                    "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_SUB32mr  : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),  +                    "lock\n\t" +                    "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB8mi   : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2),  +                    "lock\n\t" +                    "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB16mi  : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2),  +                    "lock\n\t" +                    "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_SUB32mi  : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2),  +                    "lock\n\t" +                     "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2),  +                    "lock\n\t" +                     "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2), +                    "lock\n\t" +                     "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; + +def LOCK_DEC8m  : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), +                    "lock\n\t" +                    "dec{b}\t$dst", []>, LOCK; +def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), +                    "lock\n\t" +                    "dec{w}\t$dst", []>, OpSize, LOCK; +def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), +                    "lock\n\t" +                    "dec{l}\t$dst", []>, LOCK; +  // Atomic exchange, and, or, xor  let Constraints = "$val = $dst", Defs = [EFLAGS],                    usesCustomDAGSchedInserter = 1 in { | 

