diff options
author | Nicolai Haehnle <nhaehnle@gmail.com> | 2016-03-18 16:24:31 +0000 |
---|---|---|
committer | Nicolai Haehnle <nhaehnle@gmail.com> | 2016-03-18 16:24:31 +0000 |
commit | ad63638f6d3ef9ec8baadd29e592c36feba523ae (patch) | |
tree | 1b0241f65f099b3435e33caeb635de53b5caeecd /llvm/lib/Target | |
parent | 3003ba00a3260bdee71dd802bcfa970c3580e6bb (diff) | |
download | bcm5719-llvm-ad63638f6d3ef9ec8baadd29e592c36feba523ae.tar.gz bcm5719-llvm-ad63638f6d3ef9ec8baadd29e592c36feba523ae.zip |
AMDGPU/SI: Add llvm.amdgcn.buffer.atomic.* intrinsics
Summary:
These intrinsics expose the BUFFER_ATOMIC_* instructions and will be used
by Mesa to implement atomics with buffer semantics. The intrinsic interface
matches that of buffer.load.format and buffer.store.format, except that the
GLC bit is not exposed (it is automatically deduced based on whether the
return value is used).
The change of hasSideEffects is required for TableGen to accept the pattern
that matches the intrinsic.
Reviewers: tstellarAMD, arsenm
Subscribers: arsenm, rivanvx, llvm-commits
Differential Revision: http://reviews.llvm.org/D18151
llvm-svn: 263791
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 11 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 74 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 104 |
3 files changed, 187 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index ff5558bf4db..9cd3c39722d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -139,6 +139,17 @@ static bool isIntrinsicSourceOfDivergence(const TargetIntrinsicInfo *TII, case Intrinsic::amdgcn_image_atomic_inc: case Intrinsic::amdgcn_image_atomic_dec: case Intrinsic::amdgcn_image_atomic_cmpswap: + case Intrinsic::amdgcn_buffer_atomic_swap: + case Intrinsic::amdgcn_buffer_atomic_add: + case Intrinsic::amdgcn_buffer_atomic_sub: + case Intrinsic::amdgcn_buffer_atomic_smin: + case Intrinsic::amdgcn_buffer_atomic_umin: + case Intrinsic::amdgcn_buffer_atomic_smax: + case Intrinsic::amdgcn_buffer_atomic_umax: + case Intrinsic::amdgcn_buffer_atomic_and: + case Intrinsic::amdgcn_buffer_atomic_or: + case Intrinsic::amdgcn_buffer_atomic_xor: + case Intrinsic::amdgcn_buffer_atomic_cmpswap: return true; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index cdaa662dac2..319ac8a8f21 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2814,10 +2814,25 @@ multiclass MUBUFAtomicAddr64_m <mubuf op, string opName, dag outs, dag ins, // for VI appropriately. } +multiclass MUBUFAtomicOther_m <mubuf op, string opName, dag outs, dag ins, + string asm, list<dag> pattern, bit is_return> { + + def "" : MUBUF_Pseudo <opName, outs, ins, pattern>, + AtomicNoRet<opName, is_return>; + + let tfe = 0 in { + let addr64 = 0 in { + def _si : MUBUF_Real_si <op, opName, outs, ins, asm>; + } + + def _vi : MUBUF_Real_vi <op, opName, outs, ins, asm>; + } +} + multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc, ValueType vt, SDPatternOperator atomic> { - let mayStore = 1, mayLoad = 1, hasPostISelHook = 1 in { + let mayStore = 1, mayLoad = 1, hasPostISelHook = 1, hasSideEffects = 1 in { // No return variants let glc = 0 in { @@ -2835,6 +2850,34 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc, slc:$slc), name#" $vdata, $srsrc, $soffset"#"$offset"#"$slc", [], 0 >; + + let offen = 1, idxen = 0 in { + defm _OFFEN : MUBUFAtomicOther_m < + op, name#"_offen", (outs), + (ins rc:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, + mbuf_offset:$offset, slc:$slc), + name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#"$slc", [], 0 + >; + } + + let offen = 0, idxen = 1 in { + defm _IDXEN : MUBUFAtomicOther_m < + op, name#"_idxen", (outs), + (ins rc:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, + mbuf_offset:$offset, slc:$slc), + name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$slc", [], 0 + >; + } + + let offen = 1, idxen = 1 in { + defm _BOTHEN : MUBUFAtomicOther_m < + op, name#"_bothen", (outs), + (ins rc:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, + mbuf_offset:$offset, slc:$slc), + name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#"$slc", + [], 0 + >; + } } // glc = 0 // Variant that return values @@ -2861,6 +2904,35 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc, i1:$slc), vt:$vdata_in))], 1 >; + let offen = 1, idxen = 0 in { + defm _RTN_OFFEN : MUBUFAtomicOther_m < + op, name#"_rtn_offen", (outs rc:$vdata), + (ins rc:$vdata_in, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, + mbuf_offset:$offset, slc:$slc), + name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#" glc"#"$slc", + [], 1 + >; + } + + let offen = 0, idxen = 1 in { + defm _RTN_IDXEN : MUBUFAtomicOther_m < + op, name#"_rtn_idxen", (outs rc:$vdata), + (ins rc:$vdata_in, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, + mbuf_offset:$offset, slc:$slc), + name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#" glc"#"$slc", + [], 1 + >; + } + + let offen = 1, idxen = 1 in { + defm _RTN_BOTHEN : MUBUFAtomicOther_m < + op, name#"_rtn_bothen", (outs rc:$vdata), + (ins rc:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, + mbuf_offset:$offset, slc:$slc), + name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#" glc"#"$slc", + [], 1 + >; + } } // glc = 1 } // mayStore = 1, mayLoad = 1, hasPostISelHook = 1 diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 902cb8bcb6f..7b0c6eaad0e 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1011,7 +1011,9 @@ defm BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper < defm BUFFER_ATOMIC_SWAP : MUBUF_Atomic < mubuf<0x30, 0x40>, "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global >; -//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <mubuf<0x31, 0x41>, "buffer_atomic_cmpswap", []>; +defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Atomic < + mubuf<0x31, 0x41>, "buffer_atomic_cmpswap", VReg_64, v2i32, null_frag +>; defm BUFFER_ATOMIC_ADD : MUBUF_Atomic < mubuf<0x32, 0x42>, "buffer_atomic_add", VGPR_32, i32, atomic_add_global >; @@ -2188,6 +2190,106 @@ def : Pat< >; //===----------------------------------------------------------------------===// +// buffer_atomic patterns +//===----------------------------------------------------------------------===// +multiclass BufferAtomicPatterns<SDPatternOperator name, string opcode> { + def : Pat< + (name i32:$vdata_in, v4i32:$rsrc, 0, + (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), + imm:$slc), + (!cast<MUBUF>(opcode # _RTN_OFFSET) $vdata_in, $rsrc, $soffset, + (as_i16imm $offset), (as_i1imm $slc)) + >; + + def : Pat< + (name i32:$vdata_in, v4i32:$rsrc, i32:$vindex, + (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), + imm:$slc), + (!cast<MUBUF>(opcode # _RTN_IDXEN) $vdata_in, $vindex, $rsrc, $soffset, + (as_i16imm $offset), (as_i1imm $slc)) + >; + + def : Pat< + (name i32:$vdata_in, v4i32:$rsrc, 0, + (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), + imm:$slc), + (!cast<MUBUF>(opcode # _RTN_OFFEN) $vdata_in, $voffset, $rsrc, $soffset, + (as_i16imm $offset), (as_i1imm $slc)) + >; + + def : Pat< + (name i32:$vdata_in, v4i32:$rsrc, i32:$vindex, + (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), + imm:$slc), + (!cast<MUBUF>(opcode # _RTN_BOTHEN) + $vdata_in, + (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), + $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)) + >; +} + +defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_swap, "BUFFER_ATOMIC_SWAP">; +defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_add, "BUFFER_ATOMIC_ADD">; +defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_sub, "BUFFER_ATOMIC_SUB">; +defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_smin, "BUFFER_ATOMIC_SMIN">; +defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_umin, "BUFFER_ATOMIC_UMIN">; +defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_smax, "BUFFER_ATOMIC_SMAX">; +defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_umax, "BUFFER_ATOMIC_UMAX">; +defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_and, "BUFFER_ATOMIC_AND">; +defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_or, "BUFFER_ATOMIC_OR">; +defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_xor, "BUFFER_ATOMIC_XOR">; + +def : Pat< + (int_amdgcn_buffer_atomic_cmpswap + i32:$data, i32:$cmp, v4i32:$rsrc, 0, + (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), + imm:$slc), + (EXTRACT_SUBREG + (BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET + (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1), + $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)), + sub0) +>; + +def : Pat< + (int_amdgcn_buffer_atomic_cmpswap + i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, + (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), + imm:$slc), + (EXTRACT_SUBREG + (BUFFER_ATOMIC_CMPSWAP_RTN_IDXEN + (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1), + $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)), + sub0) +>; + +def : Pat< + (int_amdgcn_buffer_atomic_cmpswap + i32:$data, i32:$cmp, v4i32:$rsrc, 0, + (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), + imm:$slc), + (EXTRACT_SUBREG + (BUFFER_ATOMIC_CMPSWAP_RTN_OFFEN + (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1), + $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)), + sub0) +>; + +def : Pat< + (int_amdgcn_buffer_atomic_cmpswap + i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, + (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), + imm:$slc), + (EXTRACT_SUBREG + (BUFFER_ATOMIC_CMPSWAP_RTN_BOTHEN + (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1), + (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), + $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)), + sub0) +>; + + +//===----------------------------------------------------------------------===// // S_GETREG_B32 Intrinsic Pattern. //===----------------------------------------------------------------------===// def : Pat < |