summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp68
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp39
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h1
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td5
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td5
5 files changed, 19 insertions, 99 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 951db65efbb..bf3e1da5156 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -51,10 +51,10 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
void Select(SDNode *N) override;
const char *getPassName() const override;
- void PreprocessISelDAG() override;
void PostprocessISelDAG() override;
private:
+ SDValue foldFrameIndex(SDValue N) const;
bool isInlineImmediate(const SDNode *N) const;
bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
const R600InstrInfo *TII);
@@ -902,6 +902,12 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
}
+SDValue AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
+ if (auto FI = dyn_cast<FrameIndexSDNode>(N))
+ return CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
+ return N;
+}
+
bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
SDValue &VAddr, SDValue &SOffset,
SDValue &ImmOffset) const {
@@ -921,14 +927,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
// Offsets in vaddr must be positive.
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
if (isLegalMUBUFImmOffset(C1)) {
- VAddr = N0;
+ VAddr = foldFrameIndex(N0);
ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
return true;
}
}
// (node)
- VAddr = Addr;
+ VAddr = foldFrameIndex(Addr);
ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
return true;
}
@@ -1516,62 +1522,6 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
return SelectVOP3Mods(In, Src, SrcMods);
}
-void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
- MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
-
- // Handle the perverse case where a frame index is being stored. We don't
- // want to see multiple frame index operands on the same instruction since
- // it complicates things and violates some assumptions about frame index
- // lowering.
- for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
- I != E; ++I) {
- SDValue FI = CurDAG->getTargetFrameIndex(I, MVT::i32);
-
- // It's possible that we have a frame index defined in the function that
- // isn't used in this block.
- if (FI.use_empty())
- continue;
-
- // Skip over the AssertZext inserted during lowering.
- SDValue EffectiveFI = FI;
- auto It = FI->use_begin();
- if (It->getOpcode() == ISD::AssertZext && FI->hasOneUse()) {
- EffectiveFI = SDValue(*It, 0);
- It = EffectiveFI->use_begin();
- }
-
- for (auto It = EffectiveFI->use_begin(); !It.atEnd(); ) {
- SDUse &Use = It.getUse();
- SDNode *User = Use.getUser();
- unsigned OpIdx = It.getOperandNo();
- ++It;
-
- if (MemSDNode *M = dyn_cast<MemSDNode>(User)) {
- unsigned PtrIdx = M->getOpcode() == ISD::STORE ? 2 : 1;
- if (OpIdx == PtrIdx)
- continue;
-
- unsigned OpN = M->getNumOperands();
- SDValue NewOps[8];
-
- assert(OpN < array_lengthof(NewOps));
- for (unsigned Op = 0; Op != OpN; ++Op) {
- if (Op != OpIdx) {
- NewOps[Op] = M->getOperand(Op);
- continue;
- }
-
- MachineSDNode *Mov = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
- SDLoc(M), MVT::i32, FI);
- NewOps[Op] = SDValue(Mov, 0);
- }
-
- CurDAG->UpdateNodeOperands(M, makeArrayRef(NewOps, OpN));
- }
- }
- }
-}
-
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
const AMDGPUTargetLowering& Lowering =
*static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 4570fe585d1..f8d4e6131b0 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -89,7 +89,6 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
- setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
setOperationAction(ISD::ConstantPool, MVT::v2i64, Expand);
setOperationAction(ISD::SELECT, MVT::i1, Promote);
@@ -1558,7 +1557,6 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
- case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::LOAD: {
SDValue Result = LowerLOAD(Op, DAG);
@@ -1605,43 +1603,6 @@ static SDNode *findUser(SDValue Value, unsigned Opcode) {
return nullptr;
}
-SDValue SITargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
-
- SDLoc SL(Op);
- FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Op);
- unsigned FrameIndex = FINode->getIndex();
-
- // A FrameIndex node represents a 32-bit offset into scratch memory. If the
- // high bit of a frame index offset were to be set, this would mean that it
- // represented an offset of ~2GB * 64 = ~128GB from the start of the scratch
- // buffer, with 64 being the number of threads per wave.
- //
- // The maximum private allocation for the entire GPU is 4G, and we are
- // concerned with the largest the index could ever be for an individual
- // workitem. This will occur with the minmum dispatch size. If a program
- // requires more, the dispatch size will be reduced.
- //
- // With this limit, we can mark the high bit of the FrameIndex node as known
- // zero, which is important, because it means in most situations we can prove
- // that values derived from FrameIndex nodes are non-negative. This enables us
- // to take advantage of more addressing modes when accessing scratch buffers,
- // since for scratch reads/writes, the register offset must always be
- // positive.
-
- uint64_t MaxGPUAlloc = UINT64_C(4) * 1024 * 1024 * 1024;
-
- // XXX - It is unclear if partial dispatch works. Assume it works at half wave
- // granularity. It is probably a full wave.
- uint64_t MinGranularity = 32;
-
- unsigned KnownBits = Log2_64(MaxGPUAlloc / MinGranularity);
- EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), KnownBits);
-
- SDValue TFI = DAG.getTargetFrameIndex(FrameIndex, MVT::i32);
- return DAG.getNode(ISD::AssertZext, SL, MVT::i32, TFI,
- DAG.getValueType(ExtVT));
-}
-
bool SITargetLowering::isCFIntrinsic(const SDNode *Intr) const {
if (Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
switch (cast<ConstantSDNode>(Intr->getOperand(1))->getZExtValue()) {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 06e7da63a8f..b65f95f7854 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -33,7 +33,6 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFastUnsafeFDIV(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 37f8f17bff3..d04ff6a86ec 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -278,6 +278,11 @@ return CurDAG->getTargetConstant(
N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
}]>;
+def frameindex_to_targetframeindex : SDNodeXForm<frameindex, [{
+ auto FI = cast<FrameIndexSDNode>(N);
+ return CurDAG->getTargetFrameIndex(FI->getIndex(), MVT::i32);
+}]>;
+
// Copied from the AArch64 backend:
def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
return CurDAG->getTargetConstant(
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 7daa1032f05..86c3fd6815e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1582,6 +1582,11 @@ def : Pat <
>;
def : Pat <
+ (i32 frameindex:$fi),
+ (V_MOV_B32_e32 (i32 (frameindex_to_targetframeindex $fi)))
+>;
+
+def : Pat <
(i64 InlineImm<i64>:$imm),
(S_MOV_B64 InlineImm<i64>:$imm)
>;
OpenPOWER on IntegriCloud