summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp56
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h6
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp88
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td26
6 files changed, 129 insertions, 61 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 4d86c17a18a..f4776adb069 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -2080,15 +2080,19 @@ void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
bool IsModified = false;
do {
IsModified = false;
+
// Go over all selected nodes and try to fold them a bit more
- for (SDNode &Node : CurDAG->allnodes()) {
- MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
+ SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
+ while (Position != CurDAG->allnodes_end()) {
+ SDNode *Node = &*Position++;
+ MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
if (!MachineNode)
continue;
SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
- if (ResNode != &Node) {
- ReplaceUses(&Node, ResNode);
+ if (ResNode != Node) {
+ if (ResNode)
+ ReplaceUses(Node, ResNode);
IsModified = true;
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
index 1e23aa8411a..49447862b60 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -56,15 +56,59 @@ bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
}
-int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
- switch (Channels) {
- default: return Opcode;
- case 1: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_1);
- case 2: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_2);
- case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3);
+static AMDGPU::Channels indexToChannel(unsigned Channel) {
+ switch (Channel) {
+ case 1:
+ return AMDGPU::Channels_1;
+ case 2:
+ return AMDGPU::Channels_2;
+ case 3:
+ return AMDGPU::Channels_3;
+ case 4:
+ return AMDGPU::Channels_4;
+ default:
+ llvm_unreachable("invalid MIMG channel");
}
}
+// FIXME: Need to handle d16 images correctly.
+static unsigned rcToChannels(unsigned RCID) {
+ switch (RCID) {
+ case AMDGPU::VGPR_32RegClassID:
+ return 1;
+ case AMDGPU::VReg_64RegClassID:
+ return 2;
+ case AMDGPU::VReg_96RegClassID:
+ return 3;
+ case AMDGPU::VReg_128RegClassID:
+ return 4;
+ default:
+ llvm_unreachable("invalid MIMG register class");
+ }
+}
+
+int AMDGPUInstrInfo::getMaskedMIMGOp(unsigned Opc,
+ unsigned NewChannels) const {
+ AMDGPU::Channels Channel = indexToChannel(NewChannels);
+ unsigned OrigChannels = rcToChannels(get(Opc).OpInfo[0].RegClass);
+ if (NewChannels == OrigChannels)
+ return Opc;
+
+ switch (OrigChannels) {
+ case 1:
+ return AMDGPU::getMaskedMIMGOp1(Opc, Channel);
+ case 2:
+ return AMDGPU::getMaskedMIMGOp2(Opc, Channel);
+ case 3:
+ return AMDGPU::getMaskedMIMGOp3(Opc, Channel);
+ case 4:
+ return AMDGPU::getMaskedMIMGOp4(Opc, Channel);
+ default:
+ llvm_unreachable("invalid MIMG channel");
+ }
+}
+
+
// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
enum SIEncodingFamily {
SI = 0,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index f1a42b42f1f..4ab0515d5ca 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -50,9 +50,9 @@ public:
/// not exist. If Opcode is not a pseudo instruction, this is identity.
int pseudoToMCOpcode(int Opcode) const;
- /// \brief Given a MIMG \p Opcode that writes all 4 channels, return the
- /// equivalent opcode that writes \p Channels Channels.
- int getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const;
+ /// \brief Given a MIMG \p MI that writes any number of channels, return the
+ /// equivalent opcode that writes \p NewChannels Channels.
+ int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) const;
};
} // End llvm namespace
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index bab7739511f..18dc0fb430b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6586,9 +6586,9 @@ static unsigned SubIdx2Lane(unsigned Idx) {
}
/// \brief Adjust the writemask of MIMG instructions
-void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
- SelectionDAG &DAG) const {
- SDNode *Users[4] = { };
+SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
+ SelectionDAG &DAG) const {
+ SDNode *Users[4] = { nullptr };
unsigned Lane = 0;
unsigned DmaskIdx = (Node->getNumOperands() - Node->getNumValues() == 9) ? 2 : 3;
unsigned OldDmask = Node->getConstantOperandVal(DmaskIdx);
@@ -6605,7 +6605,7 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
// Abort if we can't understand the usage
if (!I->isMachineOpcode() ||
I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
- return;
+ return Node;
// Lane means which subreg of %vgpra_vgprb_vgprc_vgprd is used.
// Note that subregs are packed, i.e. Lane==0 is the first bit set
@@ -6623,7 +6623,7 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
// Abort if we have more than one user per component
if (Users[Lane])
- return;
+ return Node;
Users[Lane] = *I;
NewDmask |= 1 << Comp;
@@ -6631,25 +6631,41 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
// Abort if there's no change
if (NewDmask == OldDmask)
- return;
+ return Node;
+
+ unsigned BitsSet = countPopulation(NewDmask);
+
+ const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
+ int NewOpcode = TII->getMaskedMIMGOp(Node->getMachineOpcode(), BitsSet);
+ assert(NewOpcode != -1 &&
+ NewOpcode != static_cast<int>(Node->getMachineOpcode()) &&
+ "failed to find equivalent MIMG op");
// Adjust the writemask in the node
- std::vector<SDValue> Ops;
+ SmallVector<SDValue, 12> Ops;
Ops.insert(Ops.end(), Node->op_begin(), Node->op_begin() + DmaskIdx);
Ops.push_back(DAG.getTargetConstant(NewDmask, SDLoc(Node), MVT::i32));
Ops.insert(Ops.end(), Node->op_begin() + DmaskIdx + 1, Node->op_end());
- Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops);
-
- // If we only got one lane, replace it with a copy
- // (if NewDmask has only one bit set...)
- if (NewDmask && (NewDmask & (NewDmask-1)) == 0) {
- SDValue RC = DAG.getTargetConstant(AMDGPU::VGPR_32RegClassID, SDLoc(),
- MVT::i32);
- SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
- SDLoc(), Users[Lane]->getValueType(0),
- SDValue(Node, 0), RC);
+
+ MVT SVT = Node->getValueType(0).getVectorElementType().getSimpleVT();
+
+ auto NewVTList =
+ DAG.getVTList(BitsSet == 1 ?
+ SVT : MVT::getVectorVT(SVT, BitsSet == 3 ? 4 : BitsSet),
+ MVT::Other);
+
+ MachineSDNode *NewNode = DAG.getMachineNode(NewOpcode, SDLoc(Node),
+ NewVTList, Ops);
+ // Update chain.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), SDValue(NewNode, 1));
+
+ if (BitsSet == 1) {
+ assert(Node->hasNUsesOfValue(1, 0));
+ SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY,
+ SDLoc(Node), Users[Lane]->getValueType(0),
+ SDValue(NewNode, 0));
DAG.ReplaceAllUsesWith(Users[Lane], Copy);
- return;
+ return nullptr;
}
// Update the users of the node with the new indices
@@ -6659,7 +6675,7 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
continue;
SDValue Op = DAG.getTargetConstant(Idx, SDLoc(User), MVT::i32);
- DAG.UpdateNodeOperands(User, User->getOperand(0), Op);
+ DAG.UpdateNodeOperands(User, SDValue(NewNode, 0), Op);
switch (Idx) {
default: break;
@@ -6668,6 +6684,9 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
case AMDGPU::sub2: Idx = AMDGPU::sub3; break;
}
}
+
+ DAG.RemoveDeadNode(Node);
+ return nullptr;
}
static bool isFrameIndexOp(SDValue Op) {
@@ -6725,14 +6744,16 @@ SDNode *SITargetLowering::legalizeTargetIndependentNode(SDNode *Node,
}
/// \brief Fold the instructions after selecting them.
+/// Returns null if users were already updated.
SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
SelectionDAG &DAG) const {
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
unsigned Opcode = Node->getMachineOpcode();
if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore() &&
- !TII->isGather4(Opcode))
- adjustWritemask(Node, DAG);
+ !TII->isGather4(Opcode)) {
+ return adjustWritemask(Node, DAG);
+ }
if (Opcode == AMDGPU::INSERT_SUBREG ||
Opcode == AMDGPU::REG_SEQUENCE) {
@@ -6810,31 +6831,6 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
return;
}
- if (TII->isMIMG(MI)) {
- unsigned VReg = MI.getOperand(0).getReg();
- const TargetRegisterClass *RC = MRI.getRegClass(VReg);
- // TODO: Need mapping tables to handle other cases (register classes).
- if (RC != &AMDGPU::VReg_128RegClass)
- return;
-
- unsigned DmaskIdx = MI.getNumOperands() == 12 ? 3 : 4;
- unsigned Writemask = MI.getOperand(DmaskIdx).getImm();
- unsigned BitsSet = 0;
- for (unsigned i = 0; i < 4; ++i)
- BitsSet += Writemask & (1 << i) ? 1 : 0;
- switch (BitsSet) {
- default: return;
- case 1: RC = &AMDGPU::VGPR_32RegClass; break;
- case 2: RC = &AMDGPU::VReg_64RegClass; break;
- case 3: RC = &AMDGPU::VReg_96RegClass; break;
- }
-
- unsigned NewOpcode = TII->getMaskedMIMGOp(MI.getOpcode(), BitsSet);
- MI.setDesc(TII->get(NewOpcode));
- MRI.setRegClass(VReg, RC);
- return;
- }
-
// Replace unused atomics with the no return version.
int NoRetAtomicOp = AMDGPU::getAtomicNoRetOp(MI.getOpcode());
if (NoRetAtomicOp != -1) {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index f68f7dc28cd..3bf5df8c1af 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -82,7 +82,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerTRAP(SDValue Op, SelectionDAG &DAG) const;
- void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
+ SDNode *adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
SDValue performUCharToFloatCombine(SDNode *N,
DAGCombinerInfo &DCI) const;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 1ce27c3789e..fc2d35d873a 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1823,7 +1823,31 @@ def getBasicFromSDWAOp : InstrMapping {
let ValueCols = [["Default"]];
}
-def getMaskedMIMGOp : InstrMapping {
+def getMaskedMIMGOp1 : InstrMapping {
+ let FilterClass = "MIMG_Mask";
+ let RowFields = ["Op"];
+ let ColFields = ["Channels"];
+ let KeyCol = ["1"];
+ let ValueCols = [["2"], ["3"], ["4"] ];
+}
+
+def getMaskedMIMGOp2 : InstrMapping {
+ let FilterClass = "MIMG_Mask";
+ let RowFields = ["Op"];
+ let ColFields = ["Channels"];
+ let KeyCol = ["2"];
+ let ValueCols = [["1"], ["3"], ["4"] ];
+}
+
+def getMaskedMIMGOp3 : InstrMapping {
+ let FilterClass = "MIMG_Mask";
+ let RowFields = ["Op"];
+ let ColFields = ["Channels"];
+ let KeyCol = ["3"];
+ let ValueCols = [["1"], ["2"], ["4"] ];
+}
+
+def getMaskedMIMGOp4 : InstrMapping {
let FilterClass = "MIMG_Mask";
let RowFields = ["Op"];
let ColFields = ["Channels"];
OpenPOWER on IntegriCloud