diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 68 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 39 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 5 |
5 files changed, 19 insertions, 99 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 951db65efbb..bf3e1da5156 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -51,10 +51,10 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; void Select(SDNode *N) override; const char *getPassName() const override; - void PreprocessISelDAG() override; void PostprocessISelDAG() override; private: + SDValue foldFrameIndex(SDValue N) const; bool isInlineImmediate(const SDNode *N) const; bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, const R600InstrInfo *TII); @@ -902,6 +902,12 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); } +SDValue AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { + if (auto FI = dyn_cast<FrameIndexSDNode>(N)) + return CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)); + return N; +} + bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, SDValue &VAddr, SDValue &SOffset, SDValue &ImmOffset) const { @@ -921,14 +927,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, // Offsets in vaddr must be positive. ConstantSDNode *C1 = cast<ConstantSDNode>(N1); if (isLegalMUBUFImmOffset(C1)) { - VAddr = N0; + VAddr = foldFrameIndex(N0); ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); return true; } } // (node) - VAddr = Addr; + VAddr = foldFrameIndex(Addr); ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); return true; } @@ -1516,62 +1522,6 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, return SelectVOP3Mods(In, Src, SrcMods); } -void AMDGPUDAGToDAGISel::PreprocessISelDAG() { - MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo(); - - // Handle the perverse case where a frame index is being stored. We don't - // want to see multiple frame index operands on the same instruction since - // it complicates things and violates some assumptions about frame index - // lowering. - for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); - I != E; ++I) { - SDValue FI = CurDAG->getTargetFrameIndex(I, MVT::i32); - - // It's possible that we have a frame index defined in the function that - // isn't used in this block. - if (FI.use_empty()) - continue; - - // Skip over the AssertZext inserted during lowering. - SDValue EffectiveFI = FI; - auto It = FI->use_begin(); - if (It->getOpcode() == ISD::AssertZext && FI->hasOneUse()) { - EffectiveFI = SDValue(*It, 0); - It = EffectiveFI->use_begin(); - } - - for (auto It = EffectiveFI->use_begin(); !It.atEnd(); ) { - SDUse &Use = It.getUse(); - SDNode *User = Use.getUser(); - unsigned OpIdx = It.getOperandNo(); - ++It; - - if (MemSDNode *M = dyn_cast<MemSDNode>(User)) { - unsigned PtrIdx = M->getOpcode() == ISD::STORE ? 2 : 1; - if (OpIdx == PtrIdx) - continue; - - unsigned OpN = M->getNumOperands(); - SDValue NewOps[8]; - - assert(OpN < array_lengthof(NewOps)); - for (unsigned Op = 0; Op != OpN; ++Op) { - if (Op != OpIdx) { - NewOps[Op] = M->getOperand(Op); - continue; - } - - MachineSDNode *Mov = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, - SDLoc(M), MVT::i32, FI); - NewOps[Op] = SDValue(Mov, 0); - } - - CurDAG->UpdateNodeOperands(M, makeArrayRef(NewOps, OpN)); - } - } - } -} - void AMDGPUDAGToDAGISel::PostprocessISelDAG() { const AMDGPUTargetLowering& Lowering = *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 4570fe585d1..f8d4e6131b0 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -89,7 +89,6 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); - setOperationAction(ISD::FrameIndex, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::v2i64, Expand); setOperationAction(ISD::SELECT, MVT::i1, Promote); @@ -1558,7 +1557,6 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); - case ISD::FrameIndex: return LowerFrameIndex(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::LOAD: { SDValue Result = LowerLOAD(Op, DAG); @@ -1605,43 +1603,6 @@ static SDNode *findUser(SDValue Value, unsigned Opcode) { return nullptr; } -SDValue SITargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const { - - SDLoc SL(Op); - FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Op); - unsigned FrameIndex = FINode->getIndex(); - - // A FrameIndex node represents a 32-bit offset into scratch memory. If the - // high bit of a frame index offset were to be set, this would mean that it - // represented an offset of ~2GB * 64 = ~128GB from the start of the scratch - // buffer, with 64 being the number of threads per wave. - // - // The maximum private allocation for the entire GPU is 4G, and we are - // concerned with the largest the index could ever be for an individual - // workitem. This will occur with the minmum dispatch size. If a program - // requires more, the dispatch size will be reduced. - // - // With this limit, we can mark the high bit of the FrameIndex node as known - // zero, which is important, because it means in most situations we can prove - // that values derived from FrameIndex nodes are non-negative. This enables us - // to take advantage of more addressing modes when accessing scratch buffers, - // since for scratch reads/writes, the register offset must always be - // positive. - - uint64_t MaxGPUAlloc = UINT64_C(4) * 1024 * 1024 * 1024; - - // XXX - It is unclear if partial dispatch works. Assume it works at half wave - // granularity. It is probably a full wave. - uint64_t MinGranularity = 32; - - unsigned KnownBits = Log2_64(MaxGPUAlloc / MinGranularity); - EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), KnownBits); - - SDValue TFI = DAG.getTargetFrameIndex(FrameIndex, MVT::i32); - return DAG.getNode(ISD::AssertZext, SL, MVT::i32, TFI, - DAG.getValueType(ExtVT)); -} - bool SITargetLowering::isCFIntrinsic(const SDNode *Intr) const { if (Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN) { switch (cast<ConstantSDNode>(Intr->getOperand(1))->getZExtValue()) { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 06e7da63a8f..b65f95f7854 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -33,7 +33,6 @@ class SITargetLowering final : public AMDGPUTargetLowering { SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFastUnsafeFDIV(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 37f8f17bff3..d04ff6a86ec 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -278,6 +278,11 @@ return CurDAG->getTargetConstant( N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32); }]>; +def frameindex_to_targetframeindex : SDNodeXForm<frameindex, [{ + auto FI = cast<FrameIndexSDNode>(N); + return CurDAG->getTargetFrameIndex(FI->getIndex(), MVT::i32); +}]>; + // Copied from the AArch64 backend: def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{ return CurDAG->getTargetConstant( diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 7daa1032f05..86c3fd6815e 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1582,6 +1582,11 @@ def : Pat < >; def : Pat < + (i32 frameindex:$fi), + (V_MOV_B32_e32 (i32 (frameindex_to_targetframeindex $fi))) +>; + +def : Pat < (i64 InlineImm<i64>:$imm), (S_MOV_B64 InlineImm<i64>:$imm) >; |