diff options
Diffstat (limited to 'llvm/lib/Target/R600')
| -rw-r--r-- | llvm/lib/Target/R600/SIISelLowering.cpp | 109 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/SIISelLowering.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/SIInstrInfo.cpp | 22 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/SIInstrInfo.h | 4 |
4 files changed, 137 insertions, 1 deletions
diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp index 911c5e55949..6d2e9575e59 100644 --- a/llvm/lib/Target/R600/SIISelLowering.cpp +++ b/llvm/lib/Target/R600/SIISelLowering.cpp @@ -235,6 +235,26 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setTargetDAGCombine(ISD::UINT_TO_FP); + // All memory operations. Some folding on the pointer operand is done to help + // matching the constant offsets in the addressing modes. + setTargetDAGCombine(ISD::LOAD); + setTargetDAGCombine(ISD::STORE); + setTargetDAGCombine(ISD::ATOMIC_LOAD); + setTargetDAGCombine(ISD::ATOMIC_STORE); + setTargetDAGCombine(ISD::ATOMIC_CMP_SWAP); + setTargetDAGCombine(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); + setTargetDAGCombine(ISD::ATOMIC_SWAP); + setTargetDAGCombine(ISD::ATOMIC_LOAD_ADD); + setTargetDAGCombine(ISD::ATOMIC_LOAD_SUB); + setTargetDAGCombine(ISD::ATOMIC_LOAD_AND); + setTargetDAGCombine(ISD::ATOMIC_LOAD_OR); + setTargetDAGCombine(ISD::ATOMIC_LOAD_XOR); + setTargetDAGCombine(ISD::ATOMIC_LOAD_NAND); + setTargetDAGCombine(ISD::ATOMIC_LOAD_MIN); + setTargetDAGCombine(ISD::ATOMIC_LOAD_MAX); + setTargetDAGCombine(ISD::ATOMIC_LOAD_UMIN); + setTargetDAGCombine(ISD::ATOMIC_LOAD_UMAX); + setSchedulingPreference(Sched::RegPressure); } @@ -1296,6 +1316,56 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N, return SDValue(); } +// (shl (add x, c1), c2) -> add (shl x, c2), (shl c1, c2) + +// This is a variant of +// (mul (add x, c1), c2) -> add (mul x, c2), (mul c1, c2), +// +// The normal DAG combiner will do this, but only if the add has one use since +// that would increase the number of instructions. +// +// This prevents us from seeing a constant offset that can be folded into a +// memory instruction's addressing mode. If we know the resulting add offset of +// a pointer can be folded into an addressing offset, we can replace the pointer +// operand with the add of new constant offset. This eliminates one of the uses, +// and may allow the remaining use to also be simplified. +// +SDValue SITargetLowering::performSHLPtrCombine(SDNode *N, + unsigned AddrSpace, + DAGCombinerInfo &DCI) const { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + if (N0.getOpcode() != ISD::ADD) + return SDValue(); + + const ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N1); + if (!CN1) + return SDValue(); + + const ConstantSDNode *CAdd = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!CAdd) + return SDValue(); + + const SIInstrInfo *TII = static_cast<const SIInstrInfo *>( + getTargetMachine().getSubtargetImpl()->getInstrInfo()); + + // If the resulting offset is too large, we can't fold it into the addressing + // mode offset. + APInt Offset = CAdd->getAPIntValue() << CN1->getAPIntValue(); + if (!TII->canFoldOffset(Offset.getZExtValue(), AddrSpace)) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDLoc SL(N); + EVT VT = N->getValueType(0); + + SDValue ShlX = DAG.getNode(ISD::SHL, SL, VT, N0.getOperand(0), N1); + SDValue COffset = DAG.getConstant(Offset, MVT::i32); + + return DAG.getNode(ISD::ADD, SL, VT, ShlX, COffset); +} + SDValue SITargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -1348,8 +1418,45 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, case ISD::UINT_TO_FP: { return performUCharToFloatCombine(N, DCI); } - } + case ISD::LOAD: + case ISD::STORE: + case ISD::ATOMIC_LOAD: + case ISD::ATOMIC_STORE: + case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: { // TODO: Target mem intrinsics. + if (DCI.isBeforeLegalize()) + break; + + MemSDNode *MemNode = cast<MemSDNode>(N); + SDValue Ptr = MemNode->getBasePtr(); + // TODO: We could also do this for multiplies. + unsigned AS = MemNode->getAddressSpace(); + if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUAS::PRIVATE_ADDRESS) { + SDValue NewPtr = performSHLPtrCombine(Ptr.getNode(), AS, DCI); + if (NewPtr) { + SmallVector<SDValue, 8> NewOps; + for (unsigned I = 0, N = MemNode->getNumOperands(); I != N; ++I) + NewOps.push_back(MemNode->getOperand(I)); + + NewOps[N->getOpcode() == ISD::STORE ? 2 : 1] = NewPtr; + return SDValue(DAG.UpdateNodeOperands(MemNode, NewOps), 0); + } + } + break; + } + } return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); } diff --git a/llvm/lib/Target/R600/SIISelLowering.h b/llvm/lib/Target/R600/SIISelLowering.h index 2a9aa49491f..b952bc0345a 100644 --- a/llvm/lib/Target/R600/SIISelLowering.h +++ b/llvm/lib/Target/R600/SIISelLowering.h @@ -56,6 +56,9 @@ class SITargetLowering : public AMDGPUTargetLowering { static SDValue performUCharToFloatCombine(SDNode *N, DAGCombinerInfo &DCI); + SDValue performSHLPtrCombine(SDNode *N, + unsigned AS, + DAGCombinerInfo &DCI) const; public: SITargetLowering(TargetMachine &tm); diff --git a/llvm/lib/Target/R600/SIInstrInfo.cpp b/llvm/lib/Target/R600/SIInstrInfo.cpp index 5cd8ddfaceb..3868f63b3a8 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.cpp +++ b/llvm/lib/Target/R600/SIInstrInfo.cpp @@ -803,6 +803,28 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo, return RI.regClassCanUseImmediate(OpInfo.RegClass); } +bool SIInstrInfo::canFoldOffset(unsigned OffsetSize, unsigned AS) { + switch (AS) { + case AMDGPUAS::GLOBAL_ADDRESS: { + // MUBUF instructions a 12-bit offset in bytes. + return isUInt<12>(OffsetSize); + } + case AMDGPUAS::CONSTANT_ADDRESS: { + // SMRD instructions have an 8-bit offset in dwords. + return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4); + } + case AMDGPUAS::LOCAL_ADDRESS: + case AMDGPUAS::REGION_ADDRESS: { + // The single offset versions have a 16-bit offset in bytes. + return isUInt<16>(OffsetSize); + } + case AMDGPUAS::PRIVATE_ADDRESS: + // Indirect register addressing does not use any offsets. + default: + return 0; + } +} + bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const { return AMDGPU::getVOPe32(Opcode) != -1; } diff --git a/llvm/lib/Target/R600/SIInstrInfo.h b/llvm/lib/Target/R600/SIInstrInfo.h index 9d16bc4327b..cab448ac9f5 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.h +++ b/llvm/lib/Target/R600/SIInstrInfo.h @@ -119,6 +119,10 @@ public: bool isImmOperandLegal(const MachineInstr *MI, unsigned OpNo, const MachineOperand &MO) const; + /// \brief Return true if the given offset Size in bytes can be folded into + /// the immediate offsets of a memory instruction for the given address space. + static bool canFoldOffset(unsigned OffsetSize, unsigned AS) LLVM_READNONE; + /// \brief Return true if this 64-bit VALU instruction has a 32-bit encoding. /// This function will return false if you pass it a 32-bit instruction. bool hasVALU32BitEncoding(unsigned Opcode) const; |

