diff options
| author | Marek Olsak <marek.olsak@amd.com> | 2017-11-09 01:52:48 +0000 |
|---|---|---|
| committer | Marek Olsak <marek.olsak@amd.com> | 2017-11-09 01:52:48 +0000 |
| commit | 5cec64195ceea262f86ae3d305607eb4e7840d88 (patch) | |
| tree | 5d099f28db7c62a8b4d7d6977301f952ac46687a /llvm/lib/Target | |
| parent | 4c421a2db26753e771ca3676053352516e55e2c7 (diff) | |
| download | bcm5719-llvm-5cec64195ceea262f86ae3d305607eb4e7840d88.tar.gz bcm5719-llvm-5cec64195ceea262f86ae3d305607eb4e7840d88.zip | |
AMDGPU: Lower buffer store and atomic intrinsics manually
Summary:
Without this, SIMemoryLegalizer inserts s_waitcnt vmcnt(0) before every
buffer store and atomic instruction.
Reviewers: arsenm, nhaehnle
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D39060
llvm-svn: 317754
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 13 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 13 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/BUFInstructions.td | 40 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 113 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 47 |
5 files changed, 206 insertions, 20 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index d502b77447d..f3b4a4f21ac 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3960,6 +3960,19 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(ATOMIC_DEC) NODE_NAME_CASE(BUFFER_LOAD) NODE_NAME_CASE(BUFFER_LOAD_FORMAT) + NODE_NAME_CASE(BUFFER_STORE) + NODE_NAME_CASE(BUFFER_STORE_FORMAT) + NODE_NAME_CASE(BUFFER_ATOMIC_SWAP) + NODE_NAME_CASE(BUFFER_ATOMIC_ADD) + NODE_NAME_CASE(BUFFER_ATOMIC_SUB) + NODE_NAME_CASE(BUFFER_ATOMIC_SMIN) + NODE_NAME_CASE(BUFFER_ATOMIC_UMIN) + NODE_NAME_CASE(BUFFER_ATOMIC_SMAX) + NODE_NAME_CASE(BUFFER_ATOMIC_UMAX) + NODE_NAME_CASE(BUFFER_ATOMIC_AND) + NODE_NAME_CASE(BUFFER_ATOMIC_OR) + NODE_NAME_CASE(BUFFER_ATOMIC_XOR) + NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP) case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break; } return nullptr; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index ba35aeb90ed..2691a1ec733 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -445,6 +445,19 @@ enum NodeType : unsigned { ATOMIC_DEC, BUFFER_LOAD, BUFFER_LOAD_FORMAT, + BUFFER_STORE, + BUFFER_STORE_FORMAT, + BUFFER_ATOMIC_SWAP, + BUFFER_ATOMIC_ADD, + BUFFER_ATOMIC_SUB, + BUFFER_ATOMIC_SMIN, + BUFFER_ATOMIC_UMIN, + BUFFER_ATOMIC_SMAX, + BUFFER_ATOMIC_UMAX, + BUFFER_ATOMIC_AND, + BUFFER_ATOMIC_OR, + BUFFER_ATOMIC_XOR, + BUFFER_ATOMIC_CMPSWAP, LAST_AMDGPU_ISD_NUMBER }; diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 351f52b6241..dc4257637a7 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -966,12 +966,12 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, >; } -defm : MUBUF_StoreIntrinsicPat<int_amdgcn_buffer_store_format, f32, "BUFFER_STORE_FORMAT_X">; -defm : MUBUF_StoreIntrinsicPat<int_amdgcn_buffer_store_format, v2f32, "BUFFER_STORE_FORMAT_XY">; -defm : MUBUF_StoreIntrinsicPat<int_amdgcn_buffer_store_format, v4f32, "BUFFER_STORE_FORMAT_XYZW">; -defm : MUBUF_StoreIntrinsicPat<int_amdgcn_buffer_store, f32, "BUFFER_STORE_DWORD">; -defm : MUBUF_StoreIntrinsicPat<int_amdgcn_buffer_store, v2f32, "BUFFER_STORE_DWORDX2">; -defm : MUBUF_StoreIntrinsicPat<int_amdgcn_buffer_store, v4f32, "BUFFER_STORE_DWORDX4">; +defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, f32, "BUFFER_STORE_FORMAT_X">; +defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v2f32, "BUFFER_STORE_FORMAT_XY">; +defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v4f32, "BUFFER_STORE_FORMAT_XYZW">; +defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, f32, "BUFFER_STORE_DWORD">; +defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2f32, "BUFFER_STORE_DWORDX2">; +defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4f32, "BUFFER_STORE_DWORDX4">; //===----------------------------------------------------------------------===// // buffer_atomic patterns @@ -1013,19 +1013,19 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, string opcode> { >; } -defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_swap, "BUFFER_ATOMIC_SWAP">; -defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_add, "BUFFER_ATOMIC_ADD">; -defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_sub, "BUFFER_ATOMIC_SUB">; -defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_smin, "BUFFER_ATOMIC_SMIN">; -defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_umin, "BUFFER_ATOMIC_UMIN">; -defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_smax, "BUFFER_ATOMIC_SMAX">; -defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_umax, "BUFFER_ATOMIC_UMAX">; -defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_and, "BUFFER_ATOMIC_AND">; -defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_or, "BUFFER_ATOMIC_OR">; -defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_xor, "BUFFER_ATOMIC_XOR">; +defm : BufferAtomicPatterns<SIbuffer_atomic_swap, "BUFFER_ATOMIC_SWAP">; +defm : BufferAtomicPatterns<SIbuffer_atomic_add, "BUFFER_ATOMIC_ADD">; +defm : BufferAtomicPatterns<SIbuffer_atomic_sub, "BUFFER_ATOMIC_SUB">; +defm : BufferAtomicPatterns<SIbuffer_atomic_smin, "BUFFER_ATOMIC_SMIN">; +defm : BufferAtomicPatterns<SIbuffer_atomic_umin, "BUFFER_ATOMIC_UMIN">; +defm : BufferAtomicPatterns<SIbuffer_atomic_smax, "BUFFER_ATOMIC_SMAX">; +defm : BufferAtomicPatterns<SIbuffer_atomic_umax, "BUFFER_ATOMIC_UMAX">; +defm : BufferAtomicPatterns<SIbuffer_atomic_and, "BUFFER_ATOMIC_AND">; +defm : BufferAtomicPatterns<SIbuffer_atomic_or, "BUFFER_ATOMIC_OR">; +defm : BufferAtomicPatterns<SIbuffer_atomic_xor, "BUFFER_ATOMIC_XOR">; def : GCNPat< - (int_amdgcn_buffer_atomic_cmpswap + (SIbuffer_atomic_cmpswap i32:$data, i32:$cmp, v4i32:$rsrc, 0, (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), imm:$slc), @@ -1037,7 +1037,7 @@ def : GCNPat< >; def : GCNPat< - (int_amdgcn_buffer_atomic_cmpswap + (SIbuffer_atomic_cmpswap i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), imm:$slc), @@ -1049,7 +1049,7 @@ def : GCNPat< >; def : GCNPat< - (int_amdgcn_buffer_atomic_cmpswap + (SIbuffer_atomic_cmpswap i32:$data, i32:$cmp, v4i32:$rsrc, 0, (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), imm:$slc), @@ -1061,7 +1061,7 @@ def : GCNPat< >; def : GCNPat< - (int_amdgcn_buffer_atomic_cmpswap + (SIbuffer_atomic_cmpswap i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), imm:$slc), diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index d1120f5e330..4428b7c0406 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4238,6 +4238,95 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL, Op->getVTList(), Ops, VT, MMO); } + case Intrinsic::amdgcn_buffer_atomic_swap: + case Intrinsic::amdgcn_buffer_atomic_add: + case Intrinsic::amdgcn_buffer_atomic_sub: + case Intrinsic::amdgcn_buffer_atomic_smin: + case Intrinsic::amdgcn_buffer_atomic_umin: + case Intrinsic::amdgcn_buffer_atomic_smax: + case Intrinsic::amdgcn_buffer_atomic_umax: + case Intrinsic::amdgcn_buffer_atomic_and: + case Intrinsic::amdgcn_buffer_atomic_or: + case Intrinsic::amdgcn_buffer_atomic_xor: { + SDValue Ops[] = { + Op.getOperand(0), // Chain + Op.getOperand(2), // vdata + Op.getOperand(3), // rsrc + Op.getOperand(4), // vindex + Op.getOperand(5), // offset + Op.getOperand(6) // slc + }; + EVT VT = Op.getOperand(3).getValueType(); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo(), + MachineMemOperand::MOLoad | + MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable | + MachineMemOperand::MOVolatile, + VT.getStoreSize(), 4); + unsigned Opcode = 0; + + switch (IntrID) { + case Intrinsic::amdgcn_buffer_atomic_swap: + Opcode = AMDGPUISD::BUFFER_ATOMIC_SWAP; + break; + case Intrinsic::amdgcn_buffer_atomic_add: + Opcode = AMDGPUISD::BUFFER_ATOMIC_ADD; + break; + case Intrinsic::amdgcn_buffer_atomic_sub: + Opcode = AMDGPUISD::BUFFER_ATOMIC_SUB; + break; + case Intrinsic::amdgcn_buffer_atomic_smin: + Opcode = AMDGPUISD::BUFFER_ATOMIC_SMIN; + break; + case Intrinsic::amdgcn_buffer_atomic_umin: + Opcode = AMDGPUISD::BUFFER_ATOMIC_UMIN; + break; + case Intrinsic::amdgcn_buffer_atomic_smax: + Opcode = AMDGPUISD::BUFFER_ATOMIC_SMAX; + break; + case Intrinsic::amdgcn_buffer_atomic_umax: + Opcode = AMDGPUISD::BUFFER_ATOMIC_UMAX; + break; + case Intrinsic::amdgcn_buffer_atomic_and: + Opcode = AMDGPUISD::BUFFER_ATOMIC_AND; + break; + case Intrinsic::amdgcn_buffer_atomic_or: + Opcode = AMDGPUISD::BUFFER_ATOMIC_OR; + break; + case Intrinsic::amdgcn_buffer_atomic_xor: + Opcode = AMDGPUISD::BUFFER_ATOMIC_XOR; + break; + default: + llvm_unreachable("unhandled atomic opcode"); + } + + return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT, MMO); + } + + case Intrinsic::amdgcn_buffer_atomic_cmpswap: { + SDValue Ops[] = { + Op.getOperand(0), // Chain + Op.getOperand(2), // src + Op.getOperand(3), // cmp + Op.getOperand(4), // rsrc + Op.getOperand(5), // vindex + Op.getOperand(6), // offset + Op.getOperand(7) // slc + }; + EVT VT = Op.getOperand(4).getValueType(); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo(), + MachineMemOperand::MOLoad | + MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable | + MachineMemOperand::MOVolatile, + VT.getStoreSize(), 4); + + return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL, + Op->getVTList(), Ops, VT, MMO); + } + // Basic sample. case Intrinsic::amdgcn_image_sample: case Intrinsic::amdgcn_image_sample_cl: @@ -4465,6 +4554,30 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Op->getVTList(), Ops, VT, MMO); } + case Intrinsic::amdgcn_buffer_store: + case Intrinsic::amdgcn_buffer_store_format: { + SDValue Ops[] = { + Chain, + Op.getOperand(2), // vdata + Op.getOperand(3), // rsrc + Op.getOperand(4), // vindex + Op.getOperand(5), // offset + Op.getOperand(6), // glc + Op.getOperand(7) // slc + }; + EVT VT = Op.getOperand(3).getValueType(); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo(), + MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable, + VT.getStoreSize(), 4); + + unsigned Opcode = IntrinsicID == Intrinsic::amdgcn_buffer_store ? + AMDGPUISD::BUFFER_STORE : + AMDGPUISD::BUFFER_STORE_FORMAT; + return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT, MMO); + } + default: return Op; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 1273f451e18..aad965dc6ea 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -93,6 +93,53 @@ def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad, def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad, [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; +def SDTBufferStore : SDTypeProfile<0, 6, + [ // vdata + SDTCisVT<1, v4i32>, // rsrc + SDTCisVT<2, i32>, // vindex + SDTCisVT<3, i32>, // offset + SDTCisVT<4, i1>, // glc + SDTCisVT<5, i1>]>; // slc + +def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore, + [SDNPMemOperand, SDNPHasChain, SDNPMayStore]>; +def SIbuffer_store_format : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT", SDTBufferStore, + [SDNPMemOperand, SDNPHasChain, SDNPMayStore]>; + +class SDBufferAtomic<string opcode> : SDNode <opcode, + SDTypeProfile<1, 5, + [SDTCisVT<0, i32>, // dst + SDTCisVT<1, i32>, // vdata + SDTCisVT<2, v4i32>, // rsrc + SDTCisVT<3, i32>, // vindex + SDTCisVT<4, i32>, // offset + SDTCisVT<5, i1>]>, // slc + [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] +>; + +def SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">; +def SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">; +def SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">; +def SIbuffer_atomic_smin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMIN">; +def SIbuffer_atomic_umin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMIN">; +def SIbuffer_atomic_smax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMAX">; +def SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">; +def SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">; +def SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">; +def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">; + +def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP", + SDTypeProfile<1, 6, + [SDTCisVT<0, i32>, // dst + SDTCisVT<1, i32>, // src + SDTCisVT<2, i32>, // cmp + SDTCisVT<3, v4i32>, // rsrc + SDTCisVT<4, i32>, // vindex + SDTCisVT<5, i32>, // offset + SDTCisVT<6, i1>]>, // slc + [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] +>; + class SDSample<string opcode> : SDNode <opcode, SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v8i32>, SDTCisVT<3, v4i32>, SDTCisVT<4, i32>]> |

