diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp | 56 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 88 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 26 |
6 files changed, 129 insertions, 61 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 4d86c17a18a..f4776adb069 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -2080,15 +2080,19 @@ void AMDGPUDAGToDAGISel::PostprocessISelDAG() { bool IsModified = false; do { IsModified = false; + // Go over all selected nodes and try to fold them a bit more - for (SDNode &Node : CurDAG->allnodes()) { - MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); + SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin(); + while (Position != CurDAG->allnodes_end()) { + SDNode *Node = &*Position++; + MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node); if (!MachineNode) continue; SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); - if (ResNode != &Node) { - ReplaceUses(&Node, ResNode); + if (ResNode != Node) { + if (ResNode) + ReplaceUses(Node, ResNode); IsModified = true; } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp index 1e23aa8411a..49447862b60 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -56,15 +56,59 @@ bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, return (NumLoads <= 16 && (Offset1 - Offset0) < 64); } -int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const { - switch (Channels) { - default: return Opcode; - case 1: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_1); - case 2: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_2); - case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3); +static AMDGPU::Channels indexToChannel(unsigned Channel) { + switch (Channel) { + case 1: + return AMDGPU::Channels_1; + case 2: + return AMDGPU::Channels_2; + case 3: + return AMDGPU::Channels_3; + case 4: + return AMDGPU::Channels_4; + default: + llvm_unreachable("invalid MIMG channel"); } } +// FIXME: Need to handle d16 images correctly. +static unsigned rcToChannels(unsigned RCID) { + switch (RCID) { + case AMDGPU::VGPR_32RegClassID: + return 1; + case AMDGPU::VReg_64RegClassID: + return 2; + case AMDGPU::VReg_96RegClassID: + return 3; + case AMDGPU::VReg_128RegClassID: + return 4; + default: + llvm_unreachable("invalid MIMG register class"); + } +} + +int AMDGPUInstrInfo::getMaskedMIMGOp(unsigned Opc, + unsigned NewChannels) const { + AMDGPU::Channels Channel = indexToChannel(NewChannels); + unsigned OrigChannels = rcToChannels(get(Opc).OpInfo[0].RegClass); + if (NewChannels == OrigChannels) + return Opc; + + switch (OrigChannels) { + case 1: + return AMDGPU::getMaskedMIMGOp1(Opc, Channel); + case 2: + return AMDGPU::getMaskedMIMGOp2(Opc, Channel); + case 3: + return AMDGPU::getMaskedMIMGOp3(Opc, Channel); + case 4: + return AMDGPU::getMaskedMIMGOp4(Opc, Channel); + default: + llvm_unreachable("invalid MIMG channel"); + } +} + + // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td enum SIEncodingFamily { SI = 0, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h index f1a42b42f1f..4ab0515d5ca 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -50,9 +50,9 @@ public: /// not exist. If Opcode is not a pseudo instruction, this is identity. int pseudoToMCOpcode(int Opcode) const; - /// \brief Given a MIMG \p Opcode that writes all 4 channels, return the - /// equivalent opcode that writes \p Channels Channels. - int getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const; + /// \brief Given a MIMG \p MI that writes any number of channels, return the + /// equivalent opcode that writes \p NewChannels Channels. + int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) const; }; } // End llvm namespace diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index bab7739511f..18dc0fb430b 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6586,9 +6586,9 @@ static unsigned SubIdx2Lane(unsigned Idx) { } /// \brief Adjust the writemask of MIMG instructions -void SITargetLowering::adjustWritemask(MachineSDNode *&Node, - SelectionDAG &DAG) const { - SDNode *Users[4] = { }; +SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node, + SelectionDAG &DAG) const { + SDNode *Users[4] = { nullptr }; unsigned Lane = 0; unsigned DmaskIdx = (Node->getNumOperands() - Node->getNumValues() == 9) ? 2 : 3; unsigned OldDmask = Node->getConstantOperandVal(DmaskIdx); @@ -6605,7 +6605,7 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node, // Abort if we can't understand the usage if (!I->isMachineOpcode() || I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG) - return; + return Node; // Lane means which subreg of %vgpra_vgprb_vgprc_vgprd is used. // Note that subregs are packed, i.e. Lane==0 is the first bit set @@ -6623,7 +6623,7 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node, // Abort if we have more than one user per component if (Users[Lane]) - return; + return Node; Users[Lane] = *I; NewDmask |= 1 << Comp; @@ -6631,25 +6631,41 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node, // Abort if there's no change if (NewDmask == OldDmask) - return; + return Node; + + unsigned BitsSet = countPopulation(NewDmask); + + const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); + int NewOpcode = TII->getMaskedMIMGOp(Node->getMachineOpcode(), BitsSet); + assert(NewOpcode != -1 && + NewOpcode != static_cast<int>(Node->getMachineOpcode()) && + "failed to find equivalent MIMG op"); // Adjust the writemask in the node - std::vector<SDValue> Ops; + SmallVector<SDValue, 12> Ops; Ops.insert(Ops.end(), Node->op_begin(), Node->op_begin() + DmaskIdx); Ops.push_back(DAG.getTargetConstant(NewDmask, SDLoc(Node), MVT::i32)); Ops.insert(Ops.end(), Node->op_begin() + DmaskIdx + 1, Node->op_end()); - Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops); - - // If we only got one lane, replace it with a copy - // (if NewDmask has only one bit set...) - if (NewDmask && (NewDmask & (NewDmask-1)) == 0) { - SDValue RC = DAG.getTargetConstant(AMDGPU::VGPR_32RegClassID, SDLoc(), - MVT::i32); - SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, - SDLoc(), Users[Lane]->getValueType(0), - SDValue(Node, 0), RC); + + MVT SVT = Node->getValueType(0).getVectorElementType().getSimpleVT(); + + auto NewVTList = + DAG.getVTList(BitsSet == 1 ? + SVT : MVT::getVectorVT(SVT, BitsSet == 3 ? 4 : BitsSet), + MVT::Other); + + MachineSDNode *NewNode = DAG.getMachineNode(NewOpcode, SDLoc(Node), + NewVTList, Ops); + // Update chain. + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), SDValue(NewNode, 1)); + + if (BitsSet == 1) { + assert(Node->hasNUsesOfValue(1, 0)); + SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY, + SDLoc(Node), Users[Lane]->getValueType(0), + SDValue(NewNode, 0)); DAG.ReplaceAllUsesWith(Users[Lane], Copy); - return; + return nullptr; } // Update the users of the node with the new indices @@ -6659,7 +6675,7 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node, continue; SDValue Op = DAG.getTargetConstant(Idx, SDLoc(User), MVT::i32); - DAG.UpdateNodeOperands(User, User->getOperand(0), Op); + DAG.UpdateNodeOperands(User, SDValue(NewNode, 0), Op); switch (Idx) { default: break; @@ -6668,6 +6684,9 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node, case AMDGPU::sub2: Idx = AMDGPU::sub3; break; } } + + DAG.RemoveDeadNode(Node); + return nullptr; } static bool isFrameIndexOp(SDValue Op) { @@ -6725,14 +6744,16 @@ SDNode *SITargetLowering::legalizeTargetIndependentNode(SDNode *Node, } /// \brief Fold the instructions after selecting them. +/// Returns null if users were already updated. SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, SelectionDAG &DAG) const { const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); unsigned Opcode = Node->getMachineOpcode(); if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore() && - !TII->isGather4(Opcode)) - adjustWritemask(Node, DAG); + !TII->isGather4(Opcode)) { + return adjustWritemask(Node, DAG); + } if (Opcode == AMDGPU::INSERT_SUBREG || Opcode == AMDGPU::REG_SEQUENCE) { @@ -6810,31 +6831,6 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, return; } - if (TII->isMIMG(MI)) { - unsigned VReg = MI.getOperand(0).getReg(); - const TargetRegisterClass *RC = MRI.getRegClass(VReg); - // TODO: Need mapping tables to handle other cases (register classes). - if (RC != &AMDGPU::VReg_128RegClass) - return; - - unsigned DmaskIdx = MI.getNumOperands() == 12 ? 3 : 4; - unsigned Writemask = MI.getOperand(DmaskIdx).getImm(); - unsigned BitsSet = 0; - for (unsigned i = 0; i < 4; ++i) - BitsSet += Writemask & (1 << i) ? 1 : 0; - switch (BitsSet) { - default: return; - case 1: RC = &AMDGPU::VGPR_32RegClass; break; - case 2: RC = &AMDGPU::VReg_64RegClass; break; - case 3: RC = &AMDGPU::VReg_96RegClass; break; - } - - unsigned NewOpcode = TII->getMaskedMIMGOp(MI.getOpcode(), BitsSet); - MI.setDesc(TII->get(NewOpcode)); - MRI.setRegClass(VReg, RC); - return; - } - // Replace unused atomics with the no return version. int NoRetAtomicOp = AMDGPU::getAtomicNoRetOp(MI.getOpcode()); if (NoRetAtomicOp != -1) { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index f68f7dc28cd..3bf5df8c1af 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -82,7 +82,7 @@ class SITargetLowering final : public AMDGPUTargetLowering { SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerTRAP(SDValue Op, SelectionDAG &DAG) const; - void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const; + SDNode *adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const; SDValue performUCharToFloatCombine(SDNode *N, DAGCombinerInfo &DCI) const; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 1ce27c3789e..fc2d35d873a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1823,7 +1823,31 @@ def getBasicFromSDWAOp : InstrMapping { let ValueCols = [["Default"]]; } -def getMaskedMIMGOp : InstrMapping { +def getMaskedMIMGOp1 : InstrMapping { + let FilterClass = "MIMG_Mask"; + let RowFields = ["Op"]; + let ColFields = ["Channels"]; + let KeyCol = ["1"]; + let ValueCols = [["2"], ["3"], ["4"] ]; +} + +def getMaskedMIMGOp2 : InstrMapping { + let FilterClass = "MIMG_Mask"; + let RowFields = ["Op"]; + let ColFields = ["Channels"]; + let KeyCol = ["2"]; + let ValueCols = [["1"], ["3"], ["4"] ]; +} + +def getMaskedMIMGOp3 : InstrMapping { + let FilterClass = "MIMG_Mask"; + let RowFields = ["Op"]; + let ColFields = ["Channels"]; + let KeyCol = ["3"]; + let ValueCols = [["1"], ["2"], ["4"] ]; +} + +def getMaskedMIMGOp4 : InstrMapping { let FilterClass = "MIMG_Mask"; let RowFields = ["Op"]; let ColFields = ["Channels"]; |