diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 85 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/DSInstructions.td | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 18 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 28 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 37 |
6 files changed, 156 insertions, 21 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 0c880a31cd1..a4d096ac327 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -218,7 +218,9 @@ private: void SelectFMAD_FMA(SDNode *N); void SelectATOMIC_CMP_SWAP(SDNode *N); void SelectDSAppendConsume(SDNode *N, unsigned IntrID); + void SelectDS_GWS(SDNode *N, unsigned IntrID); void SelectINTRINSIC_W_CHAIN(SDNode *N); + void SelectINTRINSIC_VOID(SDNode *N); protected: // Include the pieces autogenerated from the target description. @@ -832,6 +834,10 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { SelectINTRINSIC_W_CHAIN(N); return; } + case ISD::INTRINSIC_VOID: { + SelectINTRINSIC_VOID(N); + return; + } } SelectCode(N); @@ -2034,6 +2040,73 @@ void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) { CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO}); } +void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) { + SDLoc SL(N); + SDValue VSrc0 = N->getOperand(2); + SDValue BaseOffset = N->getOperand(3); + int ImmOffset = 0; + SDNode *CopyToM0; + MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N); + MachineMemOperand *MMO = M->getMemOperand(); + + // Don't worry if the offset ends up in a VGPR. Only one lane will have + // effect, so SIFixSGPRCopies will validly insert readfirstlane. + + // The resource id offset is computed as (<isa opaque base> + M0[21:16] + + // offset field) % 64. Some versions of the programming guide omit the m0 + // part, or claim it's from offset 0. + if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) { + // If we have a constant offset, try to use the default value for m0 as a + // base to possibly avoid setting it up. + CopyToM0 = glueCopyToM0(N, CurDAG->getTargetConstant(-1, SL, MVT::i32)); + ImmOffset = ConstOffset->getZExtValue() + 1; + } else { + if (CurDAG->isBaseWithConstantOffset(BaseOffset)) { + ImmOffset = BaseOffset.getConstantOperandVal(1); + BaseOffset = BaseOffset.getOperand(0); + } + + // Prefer to do the shift in an SGPR since it should be possible to use m0 + // as the result directly. If it's already an SGPR, it will be eliminated + // later. + SDNode *SGPROffset + = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32, + BaseOffset); + // Shift to offset in m0 + SDNode *M0Base + = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32, + SDValue(SGPROffset, 0), + CurDAG->getTargetConstant(16, SL, MVT::i32)); + CopyToM0 = glueCopyToM0(N, SDValue(M0Base, 0)); + } + + // The manual doesn't mention this, but it seems only v0 works. + SDValue V0 = CurDAG->getRegister(AMDGPU::VGPR0, MVT::i32); + + SDValue CopyToV0 = CurDAG->getCopyToReg( + SDValue(CopyToM0, 0), SL, V0, VSrc0, + N->getOperand(N->getNumOperands() - 1)); + + SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32); + + // TODO: Can this just be removed from the instruction? + SDValue GDS = CurDAG->getTargetConstant(1, SL, MVT::i1); + + unsigned Opc = IntrID == Intrinsic::amdgcn_ds_gws_init ? + AMDGPU::DS_GWS_INIT : AMDGPU::DS_GWS_BARRIER; + + SDValue Ops[] = { + V0, + OffsetField, + GDS, + CopyToV0, // Chain + CopyToV0.getValue(1) // Glue + }; + + SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); + CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO}); +} + void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) { unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); switch (IntrID) { @@ -2044,6 +2117,18 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) { SelectDSAppendConsume(N, IntrID); return; } + } + + SelectCode(N); +} + +void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) { + unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + switch (IntrID) { + case Intrinsic::amdgcn_ds_gws_init: + case Intrinsic::amdgcn_ds_gws_barrier: + SelectDS_GWS(N, IntrID); + return; default: break; } diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index db2dcff5c56..248f6599f7e 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -467,11 +467,15 @@ defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET_mc<"ds_wrxchg_rtn_b64", VReg_64>; defm DS_WRXCHG2_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b64", VReg_128, VReg_64>; defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b64", VReg_128, VReg_64>; -def DS_GWS_INIT : DS_GWS_1D<"ds_gws_init">; +let isConvergent = 1 in { +def DS_GWS_INIT : DS_GWS_1D<"ds_gws_init"> { + let mayLoad = 0; +} def DS_GWS_SEMA_V : DS_GWS_0D<"ds_gws_sema_v">; def DS_GWS_SEMA_BR : DS_GWS_1D<"ds_gws_sema_br">; def DS_GWS_SEMA_P : DS_GWS_0D<"ds_gws_sema_p">; def DS_GWS_BARRIER : DS_GWS_1D<"ds_gws_barrier">; +} def DS_ADD_SRC2_U32 : DS_1A<"ds_add_src2_u32">; def DS_SUB_SRC2_U32 : DS_1A<"ds_sub_src2_u32">; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 80acf5783ad..54cc459e148 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -961,6 +961,24 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return true; } + case Intrinsic::amdgcn_ds_gws_init: + case Intrinsic::amdgcn_ds_gws_barrier: { + Info.opc = ISD::INTRINSIC_VOID; + + SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + Info.ptrVal = + MFI->getGWSPSV(*MF.getSubtarget<GCNSubtarget>().getInstrInfo()); + + // This is an abstract access, but we need to specify a type and size. + Info.memVT = MVT::i32; + Info.size = 4; + Info.align = 4; + + Info.flags = MachineMemOperand::MOStore; + if (IntrID == Intrinsic::amdgcn_ds_gws_barrier) + Info.flags = MachineMemOperand::MOLoad; + return true; + } default: return false; } diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 32694230717..29c891c72af 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -536,15 +536,19 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII, // Put score on the source vgprs. If this is a store, just use those // specific register(s). if (TII->isDS(Inst) && (Inst.mayStore() || Inst.mayLoad())) { + int AddrOpIdx = + AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::addr); // All GDS operations must protect their address register (same as // export.) - if (Inst.getOpcode() != AMDGPU::DS_APPEND && - Inst.getOpcode() != AMDGPU::DS_CONSUME) { - setExpScore( - &Inst, TII, TRI, MRI, - AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::addr), - CurrScore); + if (AddrOpIdx != -1) { + setExpScore(&Inst, TII, TRI, MRI, AddrOpIdx, CurrScore); + } else { + assert(Inst.getOpcode() == AMDGPU::DS_APPEND || + Inst.getOpcode() == AMDGPU::DS_CONSUME || + Inst.getOpcode() == AMDGPU::DS_GWS_INIT || + Inst.getOpcode() == AMDGPU::DS_GWS_BARRIER); } + if (Inst.mayStore()) { if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0) != -1) { @@ -1407,18 +1411,6 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF, ScoreBrackets.dump(); }); - // Check to see if this is a GWS instruction. If so, and if this is CI or - // VI, then the generated code sequence will include an S_WAITCNT 0. - // TODO: Are these the only GWS instructions? - if (Inst.getOpcode() == AMDGPU::DS_GWS_INIT || - Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_V || - Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_BR || - Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_P || - Inst.getOpcode() == AMDGPU::DS_GWS_BARRIER) { - // TODO: && context->target_info->GwsRequiresMemViolTest() ) { - ScoreBrackets.applyWaitcnt(AMDGPU::Waitcnt::allZeroExceptVsCnt()); - } - // TODO: Remove this work-around after fixing the scheduler and enable the // assert above. if (VCCZBugWorkAround) { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index bb0076c6db3..56935b35734 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2547,7 +2547,8 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const // given the typical code patterns. if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT || Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE || - Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::S_TRAP) + Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::S_TRAP || + Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER) return true; if (MI.isCall() || MI.isInlineAsm()) diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index f7d31439a25..b8d1ac55adb 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -43,7 +43,8 @@ class AMDGPUPseudoSourceValue : public PseudoSourceValue { public: enum AMDGPUPSVKind : unsigned { PSVBuffer = PseudoSourceValue::TargetCustom, - PSVImage + PSVImage, + GWSResource }; protected: @@ -87,6 +88,30 @@ public: } }; +class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue { +public: + explicit AMDGPUGWSResourcePseudoSourceValue(const TargetInstrInfo &TII) + : AMDGPUPseudoSourceValue(GWSResource, TII) {} + + static bool classof(const PseudoSourceValue *V) { + return V->kind() == GWSResource; + } + + // These are inaccessible memory from IR. + bool isAliased(const MachineFrameInfo *) const override { + return false; + } + + // These are inaccessible memory from IR. + bool mayAlias(const MachineFrameInfo *) const override { + return false; + } + + void printCustom(raw_ostream &OS) const override { + OS << "GWSResource"; + } +}; + namespace yaml { struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { @@ -188,6 +213,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs; DenseMap<const Value *, std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs; + std::unique_ptr<const AMDGPUGWSResourcePseudoSourceValue> GWSResourcePSV; private: unsigned LDSWaveSpillSize = 0; @@ -674,6 +700,15 @@ public: return PSV.first->second.get(); } + const AMDGPUGWSResourcePseudoSourceValue *getGWSPSV(const SIInstrInfo &TII) { + if (!GWSResourcePSV) { + GWSResourcePSV = + llvm::make_unique<AMDGPUGWSResourcePseudoSourceValue>(TII); + } + + return GWSResourcePSV.get(); + } + unsigned getOccupancy() const { return Occupancy; } |