diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 74 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/DSInstructions.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 11 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 5 |
5 files changed, 72 insertions, 25 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 57caff79f16..da2cf7b076d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -2089,10 +2089,39 @@ void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) { CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO}); } +static unsigned gwsIntrinToOpcode(unsigned IntrID) { + switch (IntrID) { + case Intrinsic::amdgcn_ds_gws_init: + return AMDGPU::DS_GWS_INIT; + case Intrinsic::amdgcn_ds_gws_barrier: + return AMDGPU::DS_GWS_BARRIER; + case Intrinsic::amdgcn_ds_gws_sema_v: + return AMDGPU::DS_GWS_SEMA_V; + case Intrinsic::amdgcn_ds_gws_sema_br: + return AMDGPU::DS_GWS_SEMA_BR; + case Intrinsic::amdgcn_ds_gws_sema_p: + return AMDGPU::DS_GWS_SEMA_P; + case Intrinsic::amdgcn_ds_gws_sema_release_all: + return AMDGPU::DS_GWS_SEMA_RELEASE_ALL; + default: + llvm_unreachable("not a gws intrinsic"); + } +} + void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) { + if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all && + !Subtarget->hasGWSSemaReleaseAll()) { + // Let this error. + SelectCode(N); + return; + } + + // Chain, intrinsic ID, vsrc, offset + const bool HasVSrc = N->getNumOperands() == 4; + assert(HasVSrc || N->getNumOperands() == 3); + SDLoc SL(N); - SDValue VSrc0 = N->getOperand(2); - SDValue BaseOffset = N->getOperand(3); + SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2); int ImmOffset = 0; MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N); MachineMemOperand *MMO = M->getMemOperand(); @@ -2128,28 +2157,37 @@ void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) { glueCopyToM0(N, SDValue(M0Base, 0)); } - // The manual doesn't mention this, but it seems only v0 works. - SDValue V0 = CurDAG->getRegister(AMDGPU::VGPR0, MVT::i32); + SDValue V0; + SDValue Chain = N->getOperand(0); + SDValue Glue; + if (HasVSrc) { + SDValue VSrc0 = N->getOperand(2); + + // The manual doesn't mention this, but it seems only v0 works. + V0 = CurDAG->getRegister(AMDGPU::VGPR0, MVT::i32); - SDValue CopyToV0 = CurDAG->getCopyToReg( - N->getOperand(0), SL, V0, VSrc0, - N->getOperand(N->getNumOperands() - 1)); + SDValue CopyToV0 = CurDAG->getCopyToReg( + N->getOperand(0), SL, V0, VSrc0, + N->getOperand(N->getNumOperands() - 1)); + Chain = CopyToV0; + Glue = CopyToV0.getValue(1); + } SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32); // TODO: Can this just be removed from the instruction? SDValue GDS = CurDAG->getTargetConstant(1, SL, MVT::i1); - unsigned Opc = IntrID == Intrinsic::amdgcn_ds_gws_init ? - AMDGPU::DS_GWS_INIT : AMDGPU::DS_GWS_BARRIER; + const unsigned Opc = gwsIntrinToOpcode(IntrID); + SmallVector<SDValue, 5> Ops; + if (HasVSrc) + Ops.push_back(V0); + Ops.push_back(OffsetField); + Ops.push_back(GDS); + Ops.push_back(Chain); - SDValue Ops[] = { - V0, - OffsetField, - GDS, - CopyToV0, // Chain - CopyToV0.getValue(1) // Glue - }; + if (HasVSrc) + Ops.push_back(Glue); SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO}); @@ -2175,6 +2213,10 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) { switch (IntrID) { case Intrinsic::amdgcn_ds_gws_init: case Intrinsic::amdgcn_ds_gws_barrier: + case Intrinsic::amdgcn_ds_gws_sema_v: + case Intrinsic::amdgcn_ds_gws_sema_br: + case Intrinsic::amdgcn_ds_gws_sema_p: + case Intrinsic::amdgcn_ds_gws_sema_release_all: SelectDS_GWS(N, IntrID); return; default: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index e1be0105aea..992b92d854d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -724,6 +724,11 @@ public: return getGeneration() >= GFX9; } + /// \returns if target has ds_gws_sema_release_all instruction. + bool hasGWSSemaReleaseAll() const { + return CIInsts; + } + bool hasAddNoCarry() const { return AddNoCarryInsts; } diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index 0ef5d79d66a..7890fa1502e 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -557,7 +557,9 @@ let SubtargetPredicate = isGFX7Plus in { defm DS_WRAP_RTN_B32 : DS_1A2D_RET_mc<"ds_wrap_rtn_b32", VGPR_32>; defm DS_CONDXCHG32_RTN_B64 : DS_1A1D_RET_mc<"ds_condxchg32_rtn_b64", VReg_64>; +let isConvergent = 1, usesCustomInserter = 1 in { def DS_GWS_SEMA_RELEASE_ALL : DS_GWS_0D<"ds_gws_sema_release_all">; +} let mayStore = 0 in { defm DS_READ_B96 : DS_1A_RET_mc<"ds_read_b96", VReg_96>; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index fbe88ca2246..89c797d7055 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -962,7 +962,11 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return true; } case Intrinsic::amdgcn_ds_gws_init: - case Intrinsic::amdgcn_ds_gws_barrier: { + case Intrinsic::amdgcn_ds_gws_barrier: + case Intrinsic::amdgcn_ds_gws_sema_v: + case Intrinsic::amdgcn_ds_gws_sema_br: + case Intrinsic::amdgcn_ds_gws_sema_p: + case Intrinsic::amdgcn_ds_gws_sema_release_all: { Info.opc = ISD::INTRINSIC_VOID; SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); @@ -2981,9 +2985,7 @@ SITargetLowering::emitGWSMemViolTestLoop(MachineInstr &MI, std::tie(LoopBB, RemainderBB) = splitBlockForLoop(MI, *BB, true); MachineBasicBlock::iterator I = LoopBB->end(); - MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::data0); - assert(Src && "missing operand from GWS instruction"); const unsigned EncodedReg = AMDGPU::Hwreg::encodeHwreg( AMDGPU::Hwreg::ID_TRAPSTS, AMDGPU::Hwreg::OFFSET_MEM_VIOL, 1); @@ -2995,7 +2997,7 @@ SITargetLowering::emitGWSMemViolTestLoop(MachineInstr &MI, // This is a pain, but we're not allowed to have physical register live-ins // yet. Insert a pair of copies if the VGPR0 hack is necessary. - if (TargetRegisterInfo::isPhysicalRegister(Src->getReg())) { + if (Src && TargetRegisterInfo::isPhysicalRegister(Src->getReg())) { unsigned Data0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); BuildMI(*BB, std::next(Prev), DL, TII->get(AMDGPU::COPY), Data0) .add(*Src); @@ -3722,6 +3724,7 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( case AMDGPU::DS_GWS_SEMA_V: case AMDGPU::DS_GWS_SEMA_BR: case AMDGPU::DS_GWS_SEMA_P: + case AMDGPU::DS_GWS_SEMA_RELEASE_ALL: case AMDGPU::DS_GWS_BARRIER: if (getSubtarget()->hasGWSAutoReplay()) return BB; diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 36570a5d6d5..8256caffb48 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -542,11 +542,6 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII, // export.) if (AddrOpIdx != -1) { setExpScore(&Inst, TII, TRI, MRI, AddrOpIdx, CurrScore); - } else { - assert(Inst.getOpcode() == AMDGPU::DS_APPEND || - Inst.getOpcode() == AMDGPU::DS_CONSUME || - Inst.getOpcode() == AMDGPU::DS_GWS_INIT || - Inst.getOpcode() == AMDGPU::DS_GWS_BARRIER); } if (Inst.mayStore()) { |

