diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-01-28 20:14:49 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-01-28 20:14:49 +0000 |
| commit | cdd191d9db6a17b75b5f6d6f3d1d2691ac198153 (patch) | |
| tree | 3ade69542218a8b35a4658c843389b8a1fd01274 /llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | |
| parent | e4e9ba2bea60c73a19d8a673e947f0d9b258a777 (diff) | |
| download | bcm5719-llvm-cdd191d9db6a17b75b5f6d6f3d1d2691ac198153.tar.gz bcm5719-llvm-cdd191d9db6a17b75b5f6d6f3d1d2691ac198153.zip | |
AMDGPU: Add DS append/consume intrinsics
Since these pass the pointer in m0 unlike other DS instructions, these
need to worry about whether the address is uniform or not. This
assumes the address is dynamically uniform, and just uses
readfirstlane to get a copy into an SGPR.
I don't know if these have the same 16-bit add for the addressing mode
offset problem on SI or not, but I've just assumed they do.
Also includes some misc. changes to avoid test differences between the
LDS and GDS versions.
llvm-svn: 352422
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 87 |
1 files changed, 72 insertions, 15 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 1bc6be45056..2cdd691fc10 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -106,12 +106,13 @@ private: MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const; - SDNode *glueCopyToM0(SDNode *N) const; + SDNode *glueCopyToM0LDSInit(SDNode *N) const; + SDNode *glueCopyToM0(SDNode *N, SDValue Val) const; const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); - bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, + bool isDSOffsetLegal(SDValue Base, unsigned Offset, unsigned OffsetBits) const; bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, @@ -209,6 +210,7 @@ private: void SelectBRCOND(SDNode *N); void SelectFMAD_FMA(SDNode *N); void SelectATOMIC_CMP_SWAP(SDNode *N); + void SelectINTRINSIC_W_CHAIN(SDNode *N); protected: // Include the pieces autogenerated from the target description. @@ -339,29 +341,32 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, } } -SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { - if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS || - !Subtarget->ldsRequiresM0Init()) - return N; - +SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const { const SITargetLowering& Lowering = - *static_cast<const SITargetLowering*>(getTargetLowering()); + *static_cast<const SITargetLowering*>(getTargetLowering()); // Write max value to m0 before each load operation SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), - CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); + Val); SDValue Glue = M0.getValue(1); SmallVector <SDValue, 8> Ops; - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - Ops.push_back(N->getOperand(i)); - } + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + Ops.push_back(N->getOperand(i)); + Ops.push_back(Glue); return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); } +SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const { + if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS || + !Subtarget->ldsRequiresM0Init()) + return N; + return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); +} + MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm, EVT VT) const { SDNode *Lo = CurDAG->getMachineNode( @@ -472,7 +477,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { Opc == ISD::ATOMIC_LOAD_FADD || Opc == AMDGPUISD::ATOMIC_LOAD_FMIN || Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) - N = glueCopyToM0(N); + N = glueCopyToM0LDSInit(N); switch (Opc) { default: @@ -570,7 +575,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { case ISD::STORE: case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: { - N = glueCopyToM0(N); + N = glueCopyToM0LDSInit(N); break; } @@ -648,6 +653,12 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { SelectCode(N); return; } + + break; + } + case ISD::INTRINSIC_W_CHAIN: { + SelectINTRINSIC_W_CHAIN(N); + return; } } @@ -828,7 +839,7 @@ void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) { CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); } -bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, +bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset, unsigned OffsetBits) const { if ((OffsetBits == 16 && !isUInt<16>(Offset)) || (OffsetBits == 8 && !isUInt<8>(Offset))) @@ -1760,6 +1771,52 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { CurDAG->RemoveDeadNode(N); } +void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) { + unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + if ((IntrID != Intrinsic::amdgcn_ds_append && + IntrID != Intrinsic::amdgcn_ds_consume) || + N->getValueType(0) != MVT::i32) { + SelectCode(N); + return; + } + + // The address is assumed to be uniform, so if it ends up in a VGPR, it will + // be copied to an SGPR with readfirstlane. + unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ? + AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME; + + SDValue Chain = N->getOperand(0); + SDValue Ptr = N->getOperand(2); + MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N); + bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS; + + SDValue Offset; + if (CurDAG->isBaseWithConstantOffset(Ptr)) { + SDValue PtrBase = Ptr.getOperand(0); + SDValue PtrOffset = Ptr.getOperand(1); + + const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue(); + if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue(), 16)) { + N = glueCopyToM0(N, PtrBase); + Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32); + } + } + + if (!Offset) { + N = glueCopyToM0(N, Ptr); + Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32); + } + + SDValue Ops[] = { + Offset, + CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32), + Chain, + N->getOperand(N->getNumOperands() - 1) // New glue + }; + + CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); +} + bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const { Mods = 0; |

