diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/R600/SIISelLowering.cpp | 109 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/SIISelLowering.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/SIInstrInfo.cpp | 22 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/SIInstrInfo.h | 4 | 
4 files changed, 137 insertions, 1 deletions
| diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp index 911c5e55949..6d2e9575e59 100644 --- a/llvm/lib/Target/R600/SIISelLowering.cpp +++ b/llvm/lib/Target/R600/SIISelLowering.cpp @@ -235,6 +235,26 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :    setTargetDAGCombine(ISD::UINT_TO_FP); +  // All memory operations. Some folding on the pointer operand is done to help +  // matching the constant offsets in the addressing modes. +  setTargetDAGCombine(ISD::LOAD); +  setTargetDAGCombine(ISD::STORE); +  setTargetDAGCombine(ISD::ATOMIC_LOAD); +  setTargetDAGCombine(ISD::ATOMIC_STORE); +  setTargetDAGCombine(ISD::ATOMIC_CMP_SWAP); +  setTargetDAGCombine(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); +  setTargetDAGCombine(ISD::ATOMIC_SWAP); +  setTargetDAGCombine(ISD::ATOMIC_LOAD_ADD); +  setTargetDAGCombine(ISD::ATOMIC_LOAD_SUB); +  setTargetDAGCombine(ISD::ATOMIC_LOAD_AND); +  setTargetDAGCombine(ISD::ATOMIC_LOAD_OR); +  setTargetDAGCombine(ISD::ATOMIC_LOAD_XOR); +  setTargetDAGCombine(ISD::ATOMIC_LOAD_NAND); +  setTargetDAGCombine(ISD::ATOMIC_LOAD_MIN); +  setTargetDAGCombine(ISD::ATOMIC_LOAD_MAX); +  setTargetDAGCombine(ISD::ATOMIC_LOAD_UMIN); +  setTargetDAGCombine(ISD::ATOMIC_LOAD_UMAX); +    setSchedulingPreference(Sched::RegPressure);  } @@ -1296,6 +1316,56 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,    return SDValue();  } +// (shl (add x, c1), c2) -> add (shl x, c2), (shl c1, c2) + +// This is a variant of +// (mul (add x, c1), c2) -> add (mul x, c2), (mul c1, c2), +// +// The normal DAG combiner will do this, but only if the add has one use since +// that would increase the number of instructions. +// +// This prevents us from seeing a constant offset that can be folded into a +// memory instruction's addressing mode. If we know the resulting add offset of +// a pointer can be folded into an addressing offset, we can replace the pointer +// operand with the add of new constant offset. This eliminates one of the uses, +// and may allow the remaining use to also be simplified. +// +SDValue SITargetLowering::performSHLPtrCombine(SDNode *N, +                                               unsigned AddrSpace, +                                               DAGCombinerInfo &DCI) const { +  SDValue N0 = N->getOperand(0); +  SDValue N1 = N->getOperand(1); + +  if (N0.getOpcode() != ISD::ADD) +    return SDValue(); + +  const ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N1); +  if (!CN1) +    return SDValue(); + +  const ConstantSDNode *CAdd = dyn_cast<ConstantSDNode>(N0.getOperand(1)); +  if (!CAdd) +    return SDValue(); + +  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>( +      getTargetMachine().getSubtargetImpl()->getInstrInfo()); + +  // If the resulting offset is too large, we can't fold it into the addressing +  // mode offset. +  APInt Offset = CAdd->getAPIntValue() << CN1->getAPIntValue(); +  if (!TII->canFoldOffset(Offset.getZExtValue(), AddrSpace)) +    return SDValue(); + +  SelectionDAG &DAG = DCI.DAG; +  SDLoc SL(N); +  EVT VT = N->getValueType(0); + +  SDValue ShlX = DAG.getNode(ISD::SHL, SL, VT, N0.getOperand(0), N1); +  SDValue COffset = DAG.getConstant(Offset, MVT::i32); + +  return DAG.getNode(ISD::ADD, SL, VT, ShlX, COffset); +} +  SDValue SITargetLowering::PerformDAGCombine(SDNode *N,                                              DAGCombinerInfo &DCI) const {    SelectionDAG &DAG = DCI.DAG; @@ -1348,8 +1418,45 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,    case ISD::UINT_TO_FP: {      return performUCharToFloatCombine(N, DCI);    } -  } +  case ISD::LOAD: +  case ISD::STORE: +  case ISD::ATOMIC_LOAD: +  case ISD::ATOMIC_STORE: +  case ISD::ATOMIC_CMP_SWAP: +  case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: +  case ISD::ATOMIC_SWAP: +  case ISD::ATOMIC_LOAD_ADD: +  case ISD::ATOMIC_LOAD_SUB: +  case ISD::ATOMIC_LOAD_AND: +  case ISD::ATOMIC_LOAD_OR: +  case ISD::ATOMIC_LOAD_XOR: +  case ISD::ATOMIC_LOAD_NAND: +  case ISD::ATOMIC_LOAD_MIN: +  case ISD::ATOMIC_LOAD_MAX: +  case ISD::ATOMIC_LOAD_UMIN: +  case ISD::ATOMIC_LOAD_UMAX: { // TODO: Target mem intrinsics. +    if (DCI.isBeforeLegalize()) +      break; + +    MemSDNode *MemNode = cast<MemSDNode>(N); +    SDValue Ptr = MemNode->getBasePtr(); +    // TODO: We could also do this for multiplies. +    unsigned AS = MemNode->getAddressSpace(); +    if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUAS::PRIVATE_ADDRESS) { +      SDValue NewPtr = performSHLPtrCombine(Ptr.getNode(), AS, DCI); +      if (NewPtr) { +        SmallVector<SDValue, 8> NewOps; +        for (unsigned I = 0, N = MemNode->getNumOperands(); I != N; ++I) +          NewOps.push_back(MemNode->getOperand(I)); + +        NewOps[N->getOpcode() == ISD::STORE ? 2 : 1] = NewPtr; +        return SDValue(DAG.UpdateNodeOperands(MemNode, NewOps), 0); +      } +    } +    break; +  } +  }    return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);  } diff --git a/llvm/lib/Target/R600/SIISelLowering.h b/llvm/lib/Target/R600/SIISelLowering.h index 2a9aa49491f..b952bc0345a 100644 --- a/llvm/lib/Target/R600/SIISelLowering.h +++ b/llvm/lib/Target/R600/SIISelLowering.h @@ -56,6 +56,9 @@ class SITargetLowering : public AMDGPUTargetLowering {    static SDValue performUCharToFloatCombine(SDNode *N,                                              DAGCombinerInfo &DCI); +  SDValue performSHLPtrCombine(SDNode *N, +                               unsigned AS, +                               DAGCombinerInfo &DCI) const;  public:    SITargetLowering(TargetMachine &tm); diff --git a/llvm/lib/Target/R600/SIInstrInfo.cpp b/llvm/lib/Target/R600/SIInstrInfo.cpp index 5cd8ddfaceb..3868f63b3a8 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.cpp +++ b/llvm/lib/Target/R600/SIInstrInfo.cpp @@ -803,6 +803,28 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,    return RI.regClassCanUseImmediate(OpInfo.RegClass);  } +bool SIInstrInfo::canFoldOffset(unsigned OffsetSize, unsigned AS) { +  switch (AS) { +  case AMDGPUAS::GLOBAL_ADDRESS: { +    // MUBUF instructions a 12-bit offset in bytes. +    return isUInt<12>(OffsetSize); +  } +  case AMDGPUAS::CONSTANT_ADDRESS: { +    // SMRD instructions have an 8-bit offset in dwords. +    return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4); +  } +  case AMDGPUAS::LOCAL_ADDRESS: +  case AMDGPUAS::REGION_ADDRESS: { +    // The single offset versions have a 16-bit offset in bytes. +    return isUInt<16>(OffsetSize); +  } +  case AMDGPUAS::PRIVATE_ADDRESS: +    // Indirect register addressing does not use any offsets. +  default: +    return 0; +  } +} +  bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {    return AMDGPU::getVOPe32(Opcode) != -1;  } diff --git a/llvm/lib/Target/R600/SIInstrInfo.h b/llvm/lib/Target/R600/SIInstrInfo.h index 9d16bc4327b..cab448ac9f5 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.h +++ b/llvm/lib/Target/R600/SIInstrInfo.h @@ -119,6 +119,10 @@ public:    bool isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,                           const MachineOperand &MO) const; +  /// \brief Return true if the given offset Size in bytes can be folded into +  /// the immediate offsets of a memory instruction for the given address space. +  static bool canFoldOffset(unsigned OffsetSize, unsigned AS) LLVM_READNONE; +    /// \brief Return true if this 64-bit VALU instruction has a 32-bit encoding.    /// This function will return false if you pass it a 32-bit instruction.    bool hasVALU32BitEncoding(unsigned Opcode) const; | 

