diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 11 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 74 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 104 |
3 files changed, 187 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index ff5558bf4db..9cd3c39722d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -139,6 +139,17 @@ static bool isIntrinsicSourceOfDivergence(const TargetIntrinsicInfo *TII, case Intrinsic::amdgcn_image_atomic_inc: case Intrinsic::amdgcn_image_atomic_dec: case Intrinsic::amdgcn_image_atomic_cmpswap: + case Intrinsic::amdgcn_buffer_atomic_swap: + case Intrinsic::amdgcn_buffer_atomic_add: + case Intrinsic::amdgcn_buffer_atomic_sub: + case Intrinsic::amdgcn_buffer_atomic_smin: + case Intrinsic::amdgcn_buffer_atomic_umin: + case Intrinsic::amdgcn_buffer_atomic_smax: + case Intrinsic::amdgcn_buffer_atomic_umax: + case Intrinsic::amdgcn_buffer_atomic_and: + case Intrinsic::amdgcn_buffer_atomic_or: + case Intrinsic::amdgcn_buffer_atomic_xor: + case Intrinsic::amdgcn_buffer_atomic_cmpswap: return true; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index cdaa662dac2..319ac8a8f21 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2814,10 +2814,25 @@ multiclass MUBUFAtomicAddr64_m <mubuf op, string opName, dag outs, dag ins, // for VI appropriately. } +multiclass MUBUFAtomicOther_m <mubuf op, string opName, dag outs, dag ins, + string asm, list<dag> pattern, bit is_return> { + + def "" : MUBUF_Pseudo <opName, outs, ins, pattern>, + AtomicNoRet<opName, is_return>; + + let tfe = 0 in { + let addr64 = 0 in { + def _si : MUBUF_Real_si <op, opName, outs, ins, asm>; + } + + def _vi : MUBUF_Real_vi <op, opName, outs, ins, asm>; + } +} + multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc, ValueType vt, SDPatternOperator atomic> { - let mayStore = 1, mayLoad = 1, hasPostISelHook = 1 in { + let mayStore = 1, mayLoad = 1, hasPostISelHook = 1, hasSideEffects = 1 in { // No return variants let glc = 0 in { @@ -2835,6 +2850,34 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc, slc:$slc), name#" $vdata, $srsrc, $soffset"#"$offset"#"$slc", [], 0 >; + + let offen = 1, idxen = 0 in { + defm _OFFEN : MUBUFAtomicOther_m < + op, name#"_offen", (outs), + (ins rc:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, + mbuf_offset:$offset, slc:$slc), + name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#"$slc", [], 0 + >; + } + + let offen = 0, idxen = 1 in { + defm _IDXEN : MUBUFAtomicOther_m < + op, name#"_idxen", (outs), + (ins rc:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, + mbuf_offset:$offset, slc:$slc), + name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$slc", [], 0 + >; + } + + let offen = 1, idxen = 1 in { + defm _BOTHEN : MUBUFAtomicOther_m < + op, name#"_bothen", (outs), + (ins rc:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, + mbuf_offset:$offset, slc:$slc), + name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#"$slc", + [], 0 + >; + } } // glc = 0 // Variant that return values @@ -2861,6 +2904,35 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc, i1:$slc), vt:$vdata_in))], 1 >; + let offen = 1, idxen = 0 in { + defm _RTN_OFFEN : MUBUFAtomicOther_m < + op, name#"_rtn_offen", (outs rc:$vdata), + (ins rc:$vdata_in, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, + mbuf_offset:$offset, slc:$slc), + name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#" glc"#"$slc", + [], 1 + >; + } + + let offen = 0, idxen = 1 in { + defm _RTN_IDXEN : MUBUFAtomicOther_m < + op, name#"_rtn_idxen", (outs rc:$vdata), + (ins rc:$vdata_in, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, + mbuf_offset:$offset, slc:$slc), + name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#" glc"#"$slc", + [], 1 + >; + } + + let offen = 1, idxen = 1 in { + defm _RTN_BOTHEN : MUBUFAtomicOther_m < + op, name#"_rtn_bothen", (outs rc:$vdata), + (ins rc:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, + mbuf_offset:$offset, slc:$slc), + name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#" glc"#"$slc", + [], 1 + >; + } } // glc = 1 } // mayStore = 1, mayLoad = 1, hasPostISelHook = 1 diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 902cb8bcb6f..7b0c6eaad0e 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1011,7 +1011,9 @@ defm BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper < defm BUFFER_ATOMIC_SWAP : MUBUF_Atomic < mubuf<0x30, 0x40>, "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global >; -//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <mubuf<0x31, 0x41>, "buffer_atomic_cmpswap", []>; +defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Atomic < + mubuf<0x31, 0x41>, "buffer_atomic_cmpswap", VReg_64, v2i32, null_frag +>; defm BUFFER_ATOMIC_ADD : MUBUF_Atomic < mubuf<0x32, 0x42>, "buffer_atomic_add", VGPR_32, i32, atomic_add_global >; @@ -2188,6 +2190,106 @@ def : Pat< >; //===----------------------------------------------------------------------===// +// buffer_atomic patterns +//===----------------------------------------------------------------------===// +multiclass BufferAtomicPatterns<SDPatternOperator name, string opcode> { + def : Pat< + (name i32:$vdata_in, v4i32:$rsrc, 0, + (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), + imm:$slc), + (!cast<MUBUF>(opcode # _RTN_OFFSET) $vdata_in, $rsrc, $soffset, + (as_i16imm $offset), (as_i1imm $slc)) + >; + + def : Pat< + (name i32:$vdata_in, v4i32:$rsrc, i32:$vindex, + (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), + imm:$slc), + (!cast<MUBUF>(opcode # _RTN_IDXEN) $vdata_in, $vindex, $rsrc, $soffset, + (as_i16imm $offset), (as_i1imm $slc)) + >; + + def : Pat< + (name i32:$vdata_in, v4i32:$rsrc, 0, + (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), + imm:$slc), + (!cast<MUBUF>(opcode # _RTN_OFFEN) $vdata_in, $voffset, $rsrc, $soffset, + (as_i16imm $offset), (as_i1imm $slc)) + >; + + def : Pat< + (name i32:$vdata_in, v4i32:$rsrc, i32:$vindex, + (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), + imm:$slc), + (!cast<MUBUF>(opcode # _RTN_BOTHEN) + $vdata_in, + (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), + $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)) + >; +} + +defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_swap, "BUFFER_ATOMIC_SWAP">; +defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_add, "BUFFER_ATOMIC_ADD">; +defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_sub, "BUFFER_ATOMIC_SUB">; +defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_smin, "BUFFER_ATOMIC_SMIN">; +defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_umin, "BUFFER_ATOMIC_UMIN">; +defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_smax, "BUFFER_ATOMIC_SMAX">; +defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_umax, "BUFFER_ATOMIC_UMAX">; +defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_and, "BUFFER_ATOMIC_AND">; +defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_or, "BUFFER_ATOMIC_OR">; +defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_xor, "BUFFER_ATOMIC_XOR">; + +def : Pat< + (int_amdgcn_buffer_atomic_cmpswap + i32:$data, i32:$cmp, v4i32:$rsrc, 0, + (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), + imm:$slc), + (EXTRACT_SUBREG + (BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET + (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1), + $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)), + sub0) +>; + +def : Pat< + (int_amdgcn_buffer_atomic_cmpswap + i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, + (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), + imm:$slc), + (EXTRACT_SUBREG + (BUFFER_ATOMIC_CMPSWAP_RTN_IDXEN + (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1), + $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)), + sub0) +>; + +def : Pat< + (int_amdgcn_buffer_atomic_cmpswap + i32:$data, i32:$cmp, v4i32:$rsrc, 0, + (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), + imm:$slc), + (EXTRACT_SUBREG + (BUFFER_ATOMIC_CMPSWAP_RTN_OFFEN + (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1), + $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)), + sub0) +>; + +def : Pat< + (int_amdgcn_buffer_atomic_cmpswap + i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, + (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), + imm:$slc), + (EXTRACT_SUBREG + (BUFFER_ATOMIC_CMPSWAP_RTN_BOTHEN + (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1), + (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), + $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)), + sub0) +>; + + +//===----------------------------------------------------------------------===// // S_GETREG_B32 Intrinsic Pattern. //===----------------------------------------------------------------------===// def : Pat < |