summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp85
-rw-r--r--llvm/lib/Target/AMDGPU/DSInstructions.td6
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp18
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp28
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h37
6 files changed, 156 insertions, 21 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 0c880a31cd1..a4d096ac327 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -218,7 +218,9 @@ private:
void SelectFMAD_FMA(SDNode *N);
void SelectATOMIC_CMP_SWAP(SDNode *N);
void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
+ void SelectDS_GWS(SDNode *N, unsigned IntrID);
void SelectINTRINSIC_W_CHAIN(SDNode *N);
+ void SelectINTRINSIC_VOID(SDNode *N);
protected:
// Include the pieces autogenerated from the target description.
@@ -832,6 +834,10 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
SelectINTRINSIC_W_CHAIN(N);
return;
}
+ case ISD::INTRINSIC_VOID: {
+ SelectINTRINSIC_VOID(N);
+ return;
+ }
}
SelectCode(N);
@@ -2034,6 +2040,73 @@ void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
}
+void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
+ SDLoc SL(N);
+ SDValue VSrc0 = N->getOperand(2);
+ SDValue BaseOffset = N->getOperand(3);
+ int ImmOffset = 0;
+ SDNode *CopyToM0;
+ MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
+ MachineMemOperand *MMO = M->getMemOperand();
+
+ // Don't worry if the offset ends up in a VGPR. Only one lane will have
+ // effect, so SIFixSGPRCopies will validly insert readfirstlane.
+
+ // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
+ // offset field) % 64. Some versions of the programming guide omit the m0
+ // part, or claim it's from offset 0.
+ if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
+ // If we have a constant offset, try to use the default value for m0 as a
+ // base to possibly avoid setting it up.
+ CopyToM0 = glueCopyToM0(N, CurDAG->getTargetConstant(-1, SL, MVT::i32));
+ ImmOffset = ConstOffset->getZExtValue() + 1;
+ } else {
+ if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
+ ImmOffset = BaseOffset.getConstantOperandVal(1);
+ BaseOffset = BaseOffset.getOperand(0);
+ }
+
+ // Prefer to do the shift in an SGPR since it should be possible to use m0
+ // as the result directly. If it's already an SGPR, it will be eliminated
+ // later.
+ SDNode *SGPROffset
+ = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
+ BaseOffset);
+ // Shift to offset in m0
+ SDNode *M0Base
+ = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
+ SDValue(SGPROffset, 0),
+ CurDAG->getTargetConstant(16, SL, MVT::i32));
+ CopyToM0 = glueCopyToM0(N, SDValue(M0Base, 0));
+ }
+
+ // The manual doesn't mention this, but it seems only v0 works.
+ SDValue V0 = CurDAG->getRegister(AMDGPU::VGPR0, MVT::i32);
+
+ SDValue CopyToV0 = CurDAG->getCopyToReg(
+ SDValue(CopyToM0, 0), SL, V0, VSrc0,
+ N->getOperand(N->getNumOperands() - 1));
+
+ SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
+
+ // TODO: Can this just be removed from the instruction?
+ SDValue GDS = CurDAG->getTargetConstant(1, SL, MVT::i1);
+
+ unsigned Opc = IntrID == Intrinsic::amdgcn_ds_gws_init ?
+ AMDGPU::DS_GWS_INIT : AMDGPU::DS_GWS_BARRIER;
+
+ SDValue Ops[] = {
+ V0,
+ OffsetField,
+ GDS,
+ CopyToV0, // Chain
+ CopyToV0.getValue(1) // Glue
+ };
+
+ SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
+}
+
void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
switch (IntrID) {
@@ -2044,6 +2117,18 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
SelectDSAppendConsume(N, IntrID);
return;
}
+ }
+
+ SelectCode(N);
+}
+
+void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
+ unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (IntrID) {
+ case Intrinsic::amdgcn_ds_gws_init:
+ case Intrinsic::amdgcn_ds_gws_barrier:
+ SelectDS_GWS(N, IntrID);
+ return;
default:
break;
}
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index db2dcff5c56..248f6599f7e 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -467,11 +467,15 @@ defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET_mc<"ds_wrxchg_rtn_b64", VReg_64>;
defm DS_WRXCHG2_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b64", VReg_128, VReg_64>;
defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b64", VReg_128, VReg_64>;
-def DS_GWS_INIT : DS_GWS_1D<"ds_gws_init">;
+let isConvergent = 1 in {
+def DS_GWS_INIT : DS_GWS_1D<"ds_gws_init"> {
+ let mayLoad = 0;
+}
def DS_GWS_SEMA_V : DS_GWS_0D<"ds_gws_sema_v">;
def DS_GWS_SEMA_BR : DS_GWS_1D<"ds_gws_sema_br">;
def DS_GWS_SEMA_P : DS_GWS_0D<"ds_gws_sema_p">;
def DS_GWS_BARRIER : DS_GWS_1D<"ds_gws_barrier">;
+}
def DS_ADD_SRC2_U32 : DS_1A<"ds_add_src2_u32">;
def DS_SUB_SRC2_U32 : DS_1A<"ds_sub_src2_u32">;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 80acf5783ad..54cc459e148 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -961,6 +961,24 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return true;
}
+ case Intrinsic::amdgcn_ds_gws_init:
+ case Intrinsic::amdgcn_ds_gws_barrier: {
+ Info.opc = ISD::INTRINSIC_VOID;
+
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ Info.ptrVal =
+ MFI->getGWSPSV(*MF.getSubtarget<GCNSubtarget>().getInstrInfo());
+
+ // This is an abstract access, but we need to specify a type and size.
+ Info.memVT = MVT::i32;
+ Info.size = 4;
+ Info.align = 4;
+
+ Info.flags = MachineMemOperand::MOStore;
+ if (IntrID == Intrinsic::amdgcn_ds_gws_barrier)
+ Info.flags = MachineMemOperand::MOLoad;
+ return true;
+ }
default:
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 32694230717..29c891c72af 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -536,15 +536,19 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
// Put score on the source vgprs. If this is a store, just use those
// specific register(s).
if (TII->isDS(Inst) && (Inst.mayStore() || Inst.mayLoad())) {
+ int AddrOpIdx =
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::addr);
// All GDS operations must protect their address register (same as
// export.)
- if (Inst.getOpcode() != AMDGPU::DS_APPEND &&
- Inst.getOpcode() != AMDGPU::DS_CONSUME) {
- setExpScore(
- &Inst, TII, TRI, MRI,
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::addr),
- CurrScore);
+ if (AddrOpIdx != -1) {
+ setExpScore(&Inst, TII, TRI, MRI, AddrOpIdx, CurrScore);
+ } else {
+ assert(Inst.getOpcode() == AMDGPU::DS_APPEND ||
+ Inst.getOpcode() == AMDGPU::DS_CONSUME ||
+ Inst.getOpcode() == AMDGPU::DS_GWS_INIT ||
+ Inst.getOpcode() == AMDGPU::DS_GWS_BARRIER);
}
+
if (Inst.mayStore()) {
if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
AMDGPU::OpName::data0) != -1) {
@@ -1407,18 +1411,6 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
ScoreBrackets.dump();
});
- // Check to see if this is a GWS instruction. If so, and if this is CI or
- // VI, then the generated code sequence will include an S_WAITCNT 0.
- // TODO: Are these the only GWS instructions?
- if (Inst.getOpcode() == AMDGPU::DS_GWS_INIT ||
- Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_V ||
- Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_BR ||
- Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_P ||
- Inst.getOpcode() == AMDGPU::DS_GWS_BARRIER) {
- // TODO: && context->target_info->GwsRequiresMemViolTest() ) {
- ScoreBrackets.applyWaitcnt(AMDGPU::Waitcnt::allZeroExceptVsCnt());
- }
-
// TODO: Remove this work-around after fixing the scheduler and enable the
// assert above.
if (VCCZBugWorkAround) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index bb0076c6db3..56935b35734 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2547,7 +2547,8 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
// given the typical code patterns.
if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE ||
- Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::S_TRAP)
+ Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::S_TRAP ||
+ Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER)
return true;
if (MI.isCall() || MI.isInlineAsm())
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index f7d31439a25..b8d1ac55adb 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -43,7 +43,8 @@ class AMDGPUPseudoSourceValue : public PseudoSourceValue {
public:
enum AMDGPUPSVKind : unsigned {
PSVBuffer = PseudoSourceValue::TargetCustom,
- PSVImage
+ PSVImage,
+ GWSResource
};
protected:
@@ -87,6 +88,30 @@ public:
}
};
+class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue {
+public:
+ explicit AMDGPUGWSResourcePseudoSourceValue(const TargetInstrInfo &TII)
+ : AMDGPUPseudoSourceValue(GWSResource, TII) {}
+
+ static bool classof(const PseudoSourceValue *V) {
+ return V->kind() == GWSResource;
+ }
+
+ // These are inaccessible memory from IR.
+ bool isAliased(const MachineFrameInfo *) const override {
+ return false;
+ }
+
+ // These are inaccessible memory from IR.
+ bool mayAlias(const MachineFrameInfo *) const override {
+ return false;
+ }
+
+ void printCustom(raw_ostream &OS) const override {
+ OS << "GWSResource";
+ }
+};
+
namespace yaml {
struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
@@ -188,6 +213,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
DenseMap<const Value *,
std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
+ std::unique_ptr<const AMDGPUGWSResourcePseudoSourceValue> GWSResourcePSV;
private:
unsigned LDSWaveSpillSize = 0;
@@ -674,6 +700,15 @@ public:
return PSV.first->second.get();
}
+ const AMDGPUGWSResourcePseudoSourceValue *getGWSPSV(const SIInstrInfo &TII) {
+ if (!GWSResourcePSV) {
+ GWSResourcePSV =
+ llvm::make_unique<AMDGPUGWSResourcePseudoSourceValue>(TII);
+ }
+
+ return GWSResourcePSV.get();
+ }
+
unsigned getOccupancy() const {
return Occupancy;
}
OpenPOWER on IntegriCloud