summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td74
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td104
3 files changed, 187 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index ff5558bf4db..9cd3c39722d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -139,6 +139,17 @@ static bool isIntrinsicSourceOfDivergence(const TargetIntrinsicInfo *TII,
case Intrinsic::amdgcn_image_atomic_inc:
case Intrinsic::amdgcn_image_atomic_dec:
case Intrinsic::amdgcn_image_atomic_cmpswap:
+ case Intrinsic::amdgcn_buffer_atomic_swap:
+ case Intrinsic::amdgcn_buffer_atomic_add:
+ case Intrinsic::amdgcn_buffer_atomic_sub:
+ case Intrinsic::amdgcn_buffer_atomic_smin:
+ case Intrinsic::amdgcn_buffer_atomic_umin:
+ case Intrinsic::amdgcn_buffer_atomic_smax:
+ case Intrinsic::amdgcn_buffer_atomic_umax:
+ case Intrinsic::amdgcn_buffer_atomic_and:
+ case Intrinsic::amdgcn_buffer_atomic_or:
+ case Intrinsic::amdgcn_buffer_atomic_xor:
+ case Intrinsic::amdgcn_buffer_atomic_cmpswap:
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index cdaa662dac2..319ac8a8f21 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2814,10 +2814,25 @@ multiclass MUBUFAtomicAddr64_m <mubuf op, string opName, dag outs, dag ins,
// for VI appropriately.
}
+multiclass MUBUFAtomicOther_m <mubuf op, string opName, dag outs, dag ins,
+ string asm, list<dag> pattern, bit is_return> {
+
+ def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
+ AtomicNoRet<opName, is_return>;
+
+ let tfe = 0 in {
+ let addr64 = 0 in {
+ def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
+ }
+
+ def _vi : MUBUF_Real_vi <op, opName, outs, ins, asm>;
+ }
+}
+
multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
ValueType vt, SDPatternOperator atomic> {
- let mayStore = 1, mayLoad = 1, hasPostISelHook = 1 in {
+ let mayStore = 1, mayLoad = 1, hasPostISelHook = 1, hasSideEffects = 1 in {
// No return variants
let glc = 0 in {
@@ -2835,6 +2850,34 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
slc:$slc),
name#" $vdata, $srsrc, $soffset"#"$offset"#"$slc", [], 0
>;
+
+ let offen = 1, idxen = 0 in {
+ defm _OFFEN : MUBUFAtomicOther_m <
+ op, name#"_offen", (outs),
+ (ins rc:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+ mbuf_offset:$offset, slc:$slc),
+ name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#"$slc", [], 0
+ >;
+ }
+
+ let offen = 0, idxen = 1 in {
+ defm _IDXEN : MUBUFAtomicOther_m <
+ op, name#"_idxen", (outs),
+ (ins rc:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+ mbuf_offset:$offset, slc:$slc),
+ name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$slc", [], 0
+ >;
+ }
+
+ let offen = 1, idxen = 1 in {
+ defm _BOTHEN : MUBUFAtomicOther_m <
+ op, name#"_bothen", (outs),
+ (ins rc:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+ mbuf_offset:$offset, slc:$slc),
+ name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#"$slc",
+ [], 0
+ >;
+ }
} // glc = 0
// Variant that return values
@@ -2861,6 +2904,35 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
i1:$slc), vt:$vdata_in))], 1
>;
+ let offen = 1, idxen = 0 in {
+ defm _RTN_OFFEN : MUBUFAtomicOther_m <
+ op, name#"_rtn_offen", (outs rc:$vdata),
+ (ins rc:$vdata_in, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+ mbuf_offset:$offset, slc:$slc),
+ name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#" glc"#"$slc",
+ [], 1
+ >;
+ }
+
+ let offen = 0, idxen = 1 in {
+ defm _RTN_IDXEN : MUBUFAtomicOther_m <
+ op, name#"_rtn_idxen", (outs rc:$vdata),
+ (ins rc:$vdata_in, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+ mbuf_offset:$offset, slc:$slc),
+ name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#" glc"#"$slc",
+ [], 1
+ >;
+ }
+
+ let offen = 1, idxen = 1 in {
+ defm _RTN_BOTHEN : MUBUFAtomicOther_m <
+ op, name#"_rtn_bothen", (outs rc:$vdata),
+ (ins rc:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+ mbuf_offset:$offset, slc:$slc),
+ name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#" glc"#"$slc",
+ [], 1
+ >;
+ }
} // glc = 1
} // mayStore = 1, mayLoad = 1, hasPostISelHook = 1
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 902cb8bcb6f..7b0c6eaad0e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1011,7 +1011,9 @@ defm BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
defm BUFFER_ATOMIC_SWAP : MUBUF_Atomic <
mubuf<0x30, 0x40>, "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global
>;
-//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <mubuf<0x31, 0x41>, "buffer_atomic_cmpswap", []>;
+defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Atomic <
+ mubuf<0x31, 0x41>, "buffer_atomic_cmpswap", VReg_64, v2i32, null_frag
+>;
defm BUFFER_ATOMIC_ADD : MUBUF_Atomic <
mubuf<0x32, 0x42>, "buffer_atomic_add", VGPR_32, i32, atomic_add_global
>;
@@ -2188,6 +2190,106 @@ def : Pat<
>;
//===----------------------------------------------------------------------===//
+// buffer_atomic patterns
+//===----------------------------------------------------------------------===//
+multiclass BufferAtomicPatterns<SDPatternOperator name, string opcode> {
+ def : Pat<
+ (name i32:$vdata_in, v4i32:$rsrc, 0,
+ (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
+ imm:$slc),
+ (!cast<MUBUF>(opcode # _RTN_OFFSET) $vdata_in, $rsrc, $soffset,
+ (as_i16imm $offset), (as_i1imm $slc))
+ >;
+
+ def : Pat<
+ (name i32:$vdata_in, v4i32:$rsrc, i32:$vindex,
+ (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
+ imm:$slc),
+ (!cast<MUBUF>(opcode # _RTN_IDXEN) $vdata_in, $vindex, $rsrc, $soffset,
+ (as_i16imm $offset), (as_i1imm $slc))
+ >;
+
+ def : Pat<
+ (name i32:$vdata_in, v4i32:$rsrc, 0,
+ (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
+ imm:$slc),
+ (!cast<MUBUF>(opcode # _RTN_OFFEN) $vdata_in, $voffset, $rsrc, $soffset,
+ (as_i16imm $offset), (as_i1imm $slc))
+ >;
+
+ def : Pat<
+ (name i32:$vdata_in, v4i32:$rsrc, i32:$vindex,
+ (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
+ imm:$slc),
+ (!cast<MUBUF>(opcode # _RTN_BOTHEN)
+ $vdata_in,
+ (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
+ $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc))
+ >;
+}
+
+defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_swap, "BUFFER_ATOMIC_SWAP">;
+defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_add, "BUFFER_ATOMIC_ADD">;
+defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_sub, "BUFFER_ATOMIC_SUB">;
+defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_smin, "BUFFER_ATOMIC_SMIN">;
+defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_umin, "BUFFER_ATOMIC_UMIN">;
+defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_smax, "BUFFER_ATOMIC_SMAX">;
+defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_umax, "BUFFER_ATOMIC_UMAX">;
+defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_and, "BUFFER_ATOMIC_AND">;
+defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_or, "BUFFER_ATOMIC_OR">;
+defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_xor, "BUFFER_ATOMIC_XOR">;
+
+def : Pat<
+ (int_amdgcn_buffer_atomic_cmpswap
+ i32:$data, i32:$cmp, v4i32:$rsrc, 0,
+ (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
+ imm:$slc),
+ (EXTRACT_SUBREG
+ (BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET
+ (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
+ $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)),
+ sub0)
+>;
+
+def : Pat<
+ (int_amdgcn_buffer_atomic_cmpswap
+ i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
+ (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
+ imm:$slc),
+ (EXTRACT_SUBREG
+ (BUFFER_ATOMIC_CMPSWAP_RTN_IDXEN
+ (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
+ $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)),
+ sub0)
+>;
+
+def : Pat<
+ (int_amdgcn_buffer_atomic_cmpswap
+ i32:$data, i32:$cmp, v4i32:$rsrc, 0,
+ (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
+ imm:$slc),
+ (EXTRACT_SUBREG
+ (BUFFER_ATOMIC_CMPSWAP_RTN_OFFEN
+ (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
+ $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)),
+ sub0)
+>;
+
+def : Pat<
+ (int_amdgcn_buffer_atomic_cmpswap
+ i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
+ (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
+ imm:$slc),
+ (EXTRACT_SUBREG
+ (BUFFER_ATOMIC_CMPSWAP_RTN_BOTHEN
+ (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
+ (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
+ $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)),
+ sub0)
+>;
+
+
+//===----------------------------------------------------------------------===//
// S_GETREG_B32 Intrinsic Pattern.
//===----------------------------------------------------------------------===//
def : Pat <
OpenPOWER on IntegriCloud