diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-06-19 19:55:27 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-06-19 19:55:27 +0000 |
| commit | 4d55d024be80bda4750e4b81904fe1aed8df67ff (patch) | |
| tree | ec15b404608d78141987c0b8709a342dfc68adbb /llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | |
| parent | 709a769cdd4e06fc611b0ae4c76197eb71e2c4db (diff) | |
| download | bcm5719-llvm-4d55d024be80bda4750e4b81904fe1aed8df67ff.tar.gz bcm5719-llvm-4d55d024be80bda4750e4b81904fe1aed8df67ff.zip | |
Reapply "AMDGPU: Add ds_gws_init / ds_gws_barrier intrinsics"
This reapplies r363678, using the correct chain for the CopyToReg for
v0. glueCopyToM0 counterintuitively changes the operands of the
original node.
llvm-svn: 363870
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 84 |
1 files changed, 84 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 0c880a31cd1..eb1e1bef73a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -218,7 +218,9 @@ private: void SelectFMAD_FMA(SDNode *N); void SelectATOMIC_CMP_SWAP(SDNode *N); void SelectDSAppendConsume(SDNode *N, unsigned IntrID); + void SelectDS_GWS(SDNode *N, unsigned IntrID); void SelectINTRINSIC_W_CHAIN(SDNode *N); + void SelectINTRINSIC_VOID(SDNode *N); protected: // Include the pieces autogenerated from the target description. @@ -832,6 +834,10 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { SelectINTRINSIC_W_CHAIN(N); return; } + case ISD::INTRINSIC_VOID: { + SelectINTRINSIC_VOID(N); + return; + } } SelectCode(N); @@ -2034,6 +2040,72 @@ void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) { CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO}); } +void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) { + SDLoc SL(N); + SDValue VSrc0 = N->getOperand(2); + SDValue BaseOffset = N->getOperand(3); + int ImmOffset = 0; + MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N); + MachineMemOperand *MMO = M->getMemOperand(); + + // Don't worry if the offset ends up in a VGPR. Only one lane will have + // effect, so SIFixSGPRCopies will validly insert readfirstlane. + + // The resource id offset is computed as (<isa opaque base> + M0[21:16] + + // offset field) % 64. Some versions of the programming guide omit the m0 + // part, or claim it's from offset 0. + if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) { + // If we have a constant offset, try to use the default value for m0 as a + // base to possibly avoid setting it up. + glueCopyToM0(N, CurDAG->getTargetConstant(-1, SL, MVT::i32)); + ImmOffset = ConstOffset->getZExtValue() + 1; + } else { + if (CurDAG->isBaseWithConstantOffset(BaseOffset)) { + ImmOffset = BaseOffset.getConstantOperandVal(1); + BaseOffset = BaseOffset.getOperand(0); + } + + // Prefer to do the shift in an SGPR since it should be possible to use m0 + // as the result directly. If it's already an SGPR, it will be eliminated + // later. + SDNode *SGPROffset + = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32, + BaseOffset); + // Shift to offset in m0 + SDNode *M0Base + = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32, + SDValue(SGPROffset, 0), + CurDAG->getTargetConstant(16, SL, MVT::i32)); + glueCopyToM0(N, SDValue(M0Base, 0)); + } + + // The manual doesn't mention this, but it seems only v0 works. + SDValue V0 = CurDAG->getRegister(AMDGPU::VGPR0, MVT::i32); + + SDValue CopyToV0 = CurDAG->getCopyToReg( + N->getOperand(0), SL, V0, VSrc0, + N->getOperand(N->getNumOperands() - 1)); + + SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32); + + // TODO: Can this just be removed from the instruction? + SDValue GDS = CurDAG->getTargetConstant(1, SL, MVT::i1); + + unsigned Opc = IntrID == Intrinsic::amdgcn_ds_gws_init ? + AMDGPU::DS_GWS_INIT : AMDGPU::DS_GWS_BARRIER; + + SDValue Ops[] = { + V0, + OffsetField, + GDS, + CopyToV0, // Chain + CopyToV0.getValue(1) // Glue + }; + + SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); + CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO}); +} + void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) { unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); switch (IntrID) { @@ -2044,6 +2116,18 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) { SelectDSAppendConsume(N, IntrID); return; } + } + + SelectCode(N); +} + +void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) { + unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + switch (IntrID) { + case Intrinsic::amdgcn_ds_gws_init: + case Intrinsic::amdgcn_ds_gws_barrier: + SelectDS_GWS(N, IntrID); + return; default: break; } |

