diff options
-rw-r--r-- | llvm/include/llvm/Target/TargetSelectionDAG.td | 10 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 70 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/BUFInstructions.td | 48 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/DSInstructions.td | 12 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/EvergreenInstructions.td | 60 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/FLATInstructions.td | 148 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 61 |
7 files changed, 215 insertions, 194 deletions
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 1504a5becd8..12a7ecc0412 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -1369,26 +1369,26 @@ multiclass ternary_atomic_op_ord<SDNode atomic_op> { } } -multiclass binary_atomic_op<SDNode atomic_op> { +multiclass binary_atomic_op<SDNode atomic_op, bit IsInt = 1> { def _8 : PatFrag<(ops node:$ptr, node:$val), (atomic_op node:$ptr, node:$val)> { let IsAtomic = 1; - let MemoryVT = i8; + let MemoryVT = !if(IsInt, i8, ?); } def _16 : PatFrag<(ops node:$ptr, node:$val), (atomic_op node:$ptr, node:$val)> { let IsAtomic = 1; - let MemoryVT = i16; + let MemoryVT = !if(IsInt, i16, f16); } def _32 : PatFrag<(ops node:$ptr, node:$val), (atomic_op node:$ptr, node:$val)> { let IsAtomic = 1; - let MemoryVT = i32; + let MemoryVT = !if(IsInt, i32, f32); } def _64 : PatFrag<(ops node:$ptr, node:$val), (atomic_op node:$ptr, node:$val)> { let IsAtomic = 1; - let MemoryVT = i64; + let MemoryVT = !if(IsInt, i64, f64); } defm NAME#_8 : binary_atomic_op_ord<atomic_op>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 83a2f5265db..d4d2e55971b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -466,6 +466,35 @@ defm atomic_store_#as : binary_atomic_op<atomic_store>; } // End foreach AddrSpace +multiclass ret_noret_binary_atomic_op<SDNode atomic_op, bit IsInt = 1> { + foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { + let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { + defm "_"#as : binary_atomic_op<atomic_op, IsInt>; + + let PredicateCode = [{return (SDValue(N, 0).use_empty());}] in { + defm "_"#as#"_noret" : binary_atomic_op<atomic_op, IsInt>; + } + + let PredicateCode = [{return !(SDValue(N, 0).use_empty());}] in { + defm "_"#as#"_ret" : binary_atomic_op<atomic_op, IsInt>; + } + } + } +} + +defm atomic_swap : ret_noret_binary_atomic_op<atomic_swap>; +defm atomic_load_add : ret_noret_binary_atomic_op<atomic_load_add>; +defm atomic_load_and : ret_noret_binary_atomic_op<atomic_load_and>; +defm atomic_load_max : ret_noret_binary_atomic_op<atomic_load_max>; +defm atomic_load_min : ret_noret_binary_atomic_op<atomic_load_min>; +defm atomic_load_or : ret_noret_binary_atomic_op<atomic_load_or>; +defm atomic_load_sub : ret_noret_binary_atomic_op<atomic_load_sub>; +defm atomic_load_umax : ret_noret_binary_atomic_op<atomic_load_umax>; +defm atomic_load_umin : ret_noret_binary_atomic_op<atomic_load_umin>; +defm atomic_load_xor : ret_noret_binary_atomic_op<atomic_load_xor>; +defm atomic_load_fadd : ret_noret_binary_atomic_op<atomic_load_fadd, 0>; + + def store_hi16_private : StoreHi16 <truncstorei16>, PrivateAddress; def truncstorei8_hi16_private : StoreHi16<truncstorei8>, PrivateAddress; @@ -521,18 +550,6 @@ class region_binary_atomic_op<SDNode atomic_op> : }]>; -def atomic_swap_local : local_binary_atomic_op<atomic_swap>; -def atomic_load_add_local : local_binary_atomic_op<atomic_load_add>; -def atomic_load_sub_local : local_binary_atomic_op<atomic_load_sub>; -def atomic_load_and_local : local_binary_atomic_op<atomic_load_and>; -def atomic_load_or_local : local_binary_atomic_op<atomic_load_or>; -def atomic_load_xor_local : local_binary_atomic_op<atomic_load_xor>; -def atomic_load_nand_local : local_binary_atomic_op<atomic_load_nand>; -def atomic_load_min_local : local_binary_atomic_op<atomic_load_min>; -def atomic_load_max_local : local_binary_atomic_op<atomic_load_max>; -def atomic_load_umin_local : local_binary_atomic_op<atomic_load_umin>; -def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>; - def mskor_global : PatFrag<(ops node:$val, node:$ptr), (AMDGPUstore_mskor node:$val, node:$ptr), [{ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; @@ -552,38 +569,15 @@ class AtomicCmpSwapRegion <SDNode cmp_swap_node> : PatFrag< return AN->getAddressSpace() == AMDGPUAS::REGION_ADDRESS; }]>; -def atomic_cmp_swap_local : AtomicCmpSwapLocal <atomic_cmp_swap>; +// FIXME: Actually set MemoryVT +def atomic_cmp_swap_local_32 : AtomicCmpSwapLocal <atomic_cmp_swap>; +def atomic_cmp_swap_local_64 : AtomicCmpSwapLocal <atomic_cmp_swap>; class global_binary_atomic_op_frag<SDNode atomic_op> : PatFrag< (ops node:$ptr, node:$value), (atomic_op node:$ptr, node:$value), [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>; -multiclass global_binary_atomic_op<SDNode atomic_op> { - def "" : global_binary_atomic_op_frag<atomic_op>; - - def _noret : PatFrag< - (ops node:$ptr, node:$value), - (atomic_op node:$ptr, node:$value), - [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>; - - def _ret : PatFrag< - (ops node:$ptr, node:$value), - (atomic_op node:$ptr, node:$value), - [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>; -} - -defm atomic_swap_global : global_binary_atomic_op<atomic_swap>; -defm atomic_add_global : global_binary_atomic_op<atomic_load_add>; -defm atomic_and_global : global_binary_atomic_op<atomic_load_and>; -defm atomic_max_global : global_binary_atomic_op<atomic_load_max>; -defm atomic_min_global : global_binary_atomic_op<atomic_load_min>; -defm atomic_or_global : global_binary_atomic_op<atomic_load_or>; -defm atomic_sub_global : global_binary_atomic_op<atomic_load_sub>; -defm atomic_umax_global : global_binary_atomic_op<atomic_load_umax>; -defm atomic_umin_global : global_binary_atomic_op<atomic_load_umin>; -defm atomic_xor_global : global_binary_atomic_op<atomic_load_xor>; - // Legacy. def AMDGPUatomic_cmp_swap_global : PatFrag< (ops node:$ptr, node:$value), diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 854d4c573bf..bd2a2d834cf 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -896,82 +896,82 @@ defm BUFFER_STORE_DWORDX4 : MUBUF_Pseudo_Stores < "buffer_store_dwordx4", VReg_128, v4i32, store_global >; defm BUFFER_ATOMIC_SWAP : MUBUF_Pseudo_Atomics < - "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global + "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global_32 >; defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Pseudo_Atomics < "buffer_atomic_cmpswap", VReg_64, v2i32, null_frag >; defm BUFFER_ATOMIC_ADD : MUBUF_Pseudo_Atomics < - "buffer_atomic_add", VGPR_32, i32, atomic_add_global + "buffer_atomic_add", VGPR_32, i32, atomic_load_add_global_32 >; defm BUFFER_ATOMIC_SUB : MUBUF_Pseudo_Atomics < - "buffer_atomic_sub", VGPR_32, i32, atomic_sub_global + "buffer_atomic_sub", VGPR_32, i32, atomic_load_sub_global_32 >; defm BUFFER_ATOMIC_SMIN : MUBUF_Pseudo_Atomics < - "buffer_atomic_smin", VGPR_32, i32, atomic_min_global + "buffer_atomic_smin", VGPR_32, i32, atomic_load_min_global_32 >; defm BUFFER_ATOMIC_UMIN : MUBUF_Pseudo_Atomics < - "buffer_atomic_umin", VGPR_32, i32, atomic_umin_global + "buffer_atomic_umin", VGPR_32, i32, atomic_load_umin_global_32 >; defm BUFFER_ATOMIC_SMAX : MUBUF_Pseudo_Atomics < - "buffer_atomic_smax", VGPR_32, i32, atomic_max_global + "buffer_atomic_smax", VGPR_32, i32, atomic_load_max_global_32 >; defm BUFFER_ATOMIC_UMAX : MUBUF_Pseudo_Atomics < - "buffer_atomic_umax", VGPR_32, i32, atomic_umax_global + "buffer_atomic_umax", VGPR_32, i32, atomic_load_umax_global_32 >; defm BUFFER_ATOMIC_AND : MUBUF_Pseudo_Atomics < - "buffer_atomic_and", VGPR_32, i32, atomic_and_global + "buffer_atomic_and", VGPR_32, i32, atomic_load_and_global_32 >; defm BUFFER_ATOMIC_OR : MUBUF_Pseudo_Atomics < - "buffer_atomic_or", VGPR_32, i32, atomic_or_global + "buffer_atomic_or", VGPR_32, i32, atomic_load_or_global_32 >; defm BUFFER_ATOMIC_XOR : MUBUF_Pseudo_Atomics < - "buffer_atomic_xor", VGPR_32, i32, atomic_xor_global + "buffer_atomic_xor", VGPR_32, i32, atomic_load_xor_global_32 >; defm BUFFER_ATOMIC_INC : MUBUF_Pseudo_Atomics < - "buffer_atomic_inc", VGPR_32, i32, atomic_inc_global + "buffer_atomic_inc", VGPR_32, i32, atomic_inc_global_32 >; defm BUFFER_ATOMIC_DEC : MUBUF_Pseudo_Atomics < - "buffer_atomic_dec", VGPR_32, i32, atomic_dec_global + "buffer_atomic_dec", VGPR_32, i32, atomic_dec_global_32 >; defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_swap_x2", VReg_64, i64, atomic_swap_global + "buffer_atomic_swap_x2", VReg_64, i64, atomic_swap_global_64 >; defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Pseudo_Atomics < "buffer_atomic_cmpswap_x2", VReg_128, v2i64, null_frag >; defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_add_x2", VReg_64, i64, atomic_add_global + "buffer_atomic_add_x2", VReg_64, i64, atomic_load_add_global_64 >; defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_sub_x2", VReg_64, i64, atomic_sub_global + "buffer_atomic_sub_x2", VReg_64, i64, atomic_load_sub_global_64 >; defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_smin_x2", VReg_64, i64, atomic_min_global + "buffer_atomic_smin_x2", VReg_64, i64, atomic_load_min_global_64 >; defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_umin_x2", VReg_64, i64, atomic_umin_global + "buffer_atomic_umin_x2", VReg_64, i64, atomic_load_umin_global_64 >; defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_smax_x2", VReg_64, i64, atomic_max_global + "buffer_atomic_smax_x2", VReg_64, i64, atomic_load_max_global_64 >; defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_umax_x2", VReg_64, i64, atomic_umax_global + "buffer_atomic_umax_x2", VReg_64, i64, atomic_load_umax_global_64 >; defm BUFFER_ATOMIC_AND_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_and_x2", VReg_64, i64, atomic_and_global + "buffer_atomic_and_x2", VReg_64, i64, atomic_load_and_global_64 >; defm BUFFER_ATOMIC_OR_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_or_x2", VReg_64, i64, atomic_or_global + "buffer_atomic_or_x2", VReg_64, i64, atomic_load_or_global_64 >; defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_xor_x2", VReg_64, i64, atomic_xor_global + "buffer_atomic_xor_x2", VReg_64, i64, atomic_load_xor_global_64 >; defm BUFFER_ATOMIC_INC_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_inc_x2", VReg_64, i64, atomic_inc_global + "buffer_atomic_inc_x2", VReg_64, i64, atomic_inc_global_64 >; defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_dec_x2", VReg_64, i64, atomic_dec_global + "buffer_atomic_dec_x2", VReg_64, i64, atomic_dec_global_64 >; let SubtargetPredicate = isGFX8GFX9 in { diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index 67e78414f01..e3210301656 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -750,15 +750,15 @@ class DSAtomicRetPat<DS_Pseudo inst, ValueType vt, PatFrag frag, bit gds=0> : GC multiclass DSAtomicRetPat_mc<DS_Pseudo inst, ValueType vt, string frag> { let OtherPredicates = [LDSRequiresM0Init] in { - def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_local_m0")>; + def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_local_m0_"#vt.Size)>; } let OtherPredicates = [NotLDSRequiresM0Init] in { def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, - !cast<PatFrag>(frag#"_local")>; + !cast<PatFrag>(frag#"_local_"#vt.Size)>; } - def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0"), 1>; + def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size), 1>; } @@ -770,15 +770,15 @@ class DSAtomicCmpXChg<DS_Pseudo inst, ValueType vt, PatFrag frag, bit gds=0> : G multiclass DSAtomicCmpXChg_mc<DS_Pseudo inst, ValueType vt, string frag> { let OtherPredicates = [LDSRequiresM0Init] in { - def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_local_m0")>; + def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_local_m0_"#vt.Size)>; } let OtherPredicates = [NotLDSRequiresM0Init] in { def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, - !cast<PatFrag>(frag#"_local")>; + !cast<PatFrag>(frag#"_local_"#vt.Size)>; } - def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0"), 1>; + def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size), 1>; } diff --git a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td index 0550092ce1d..792e26d21f9 100644 --- a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td +++ b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td @@ -322,46 +322,46 @@ def : EGOrCaymanPat<(i32 (atomic_cmp_swap_global_noret i32:$ptr, i32:$cmp, i32:$ defm AtomicSwapPat : AtomicPat <RAT_ATOMIC_XCHG_INT_RTN, RAT_ATOMIC_XCHG_INT_NORET, - atomic_swap_global_ret, - atomic_swap_global_noret>; + atomic_swap_global_ret_32, + atomic_swap_global_noret_32>; defm AtomicAddPat : AtomicPat <RAT_ATOMIC_ADD_RTN, RAT_ATOMIC_ADD_NORET, - atomic_add_global_ret, atomic_add_global_noret>; + atomic_load_add_global_ret_32, atomic_load_add_global_noret_32>; defm AtomicSubPat : AtomicPat <RAT_ATOMIC_SUB_RTN, RAT_ATOMIC_SUB_NORET, - atomic_sub_global_ret, atomic_sub_global_noret>; + atomic_load_sub_global_ret_32, atomic_load_sub_global_noret_32>; defm AtomicMinPat : AtomicPat <RAT_ATOMIC_MIN_INT_RTN, RAT_ATOMIC_MIN_INT_NORET, - atomic_min_global_ret, atomic_min_global_noret>; + atomic_load_min_global_ret_32, atomic_load_min_global_noret_32>; defm AtomicUMinPat : AtomicPat <RAT_ATOMIC_MIN_UINT_RTN, RAT_ATOMIC_MIN_UINT_NORET, - atomic_umin_global_ret, atomic_umin_global_noret>; + atomic_load_umin_global_ret_32, atomic_load_umin_global_noret_32>; defm AtomicMaxPat : AtomicPat <RAT_ATOMIC_MAX_INT_RTN, RAT_ATOMIC_MAX_INT_NORET, - atomic_max_global_ret, atomic_max_global_noret>; + atomic_load_max_global_ret_32, atomic_load_max_global_noret_32>; defm AtomicUMaxPat : AtomicPat <RAT_ATOMIC_MAX_UINT_RTN, RAT_ATOMIC_MAX_UINT_NORET, - atomic_umax_global_ret, atomic_umax_global_noret>; + atomic_load_umax_global_ret_32, atomic_load_umax_global_noret_32>; defm AtomicAndPat : AtomicPat <RAT_ATOMIC_AND_RTN, RAT_ATOMIC_AND_NORET, - atomic_and_global_ret, atomic_and_global_noret>; + atomic_load_and_global_ret_32, atomic_load_and_global_noret_32>; defm AtomicOrPat : AtomicPat <RAT_ATOMIC_OR_RTN, RAT_ATOMIC_OR_NORET, - atomic_or_global_ret, atomic_or_global_noret>; + atomic_load_or_global_ret_32, atomic_load_or_global_noret_32>; defm AtomicXorPat : AtomicPat <RAT_ATOMIC_XOR_RTN, RAT_ATOMIC_XOR_NORET, - atomic_xor_global_ret, atomic_xor_global_noret>; + atomic_load_xor_global_ret_32, atomic_load_xor_global_noret_32>; defm AtomicIncAddPat : AtomicIncDecPat <RAT_ATOMIC_INC_UINT_RTN, RAT_ATOMIC_INC_UINT_NORET, - atomic_add_global_ret, - atomic_add_global_noret, 1>; + atomic_load_add_global_ret_32, + atomic_load_add_global_noret_32, 1>; defm AtomicIncSubPat : AtomicIncDecPat <RAT_ATOMIC_INC_UINT_RTN, RAT_ATOMIC_INC_UINT_NORET, - atomic_sub_global_ret, - atomic_sub_global_noret, -1>; + atomic_load_sub_global_ret_32, + atomic_load_sub_global_noret_32, -1>; defm AtomicDecAddPat : AtomicIncDecPat <RAT_ATOMIC_DEC_UINT_RTN, RAT_ATOMIC_DEC_UINT_NORET, - atomic_add_global_ret, - atomic_add_global_noret, -1>; + atomic_load_add_global_ret_32, + atomic_load_add_global_noret_32, -1>; defm AtomicDecSubPat : AtomicIncDecPat <RAT_ATOMIC_DEC_UINT_RTN, RAT_ATOMIC_DEC_UINT_NORET, - atomic_sub_global_ret, - atomic_sub_global_noret, 1>; + atomic_load_sub_global_ret_32, + atomic_load_sub_global_noret_32, 1>; // Should be predicated on FeatureFP64 // def FMA_64 : R600_3OP < @@ -628,37 +628,37 @@ def LDS_SHORT_WRITE : R600_LDS_1A1D_NORET<0x13, "LDS_SHORT_WRITE", [(truncstorei16_local i32:$src1, i32:$src0)] >; def LDS_ADD_RET : R600_LDS_1A1D_RET <0x20, "LDS_ADD", - [(set i32:$dst, (atomic_load_add_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_add_local_32 i32:$src0, i32:$src1))] >; def LDS_SUB_RET : R600_LDS_1A1D_RET <0x21, "LDS_SUB", - [(set i32:$dst, (atomic_load_sub_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_sub_local_32 i32:$src0, i32:$src1))] >; def LDS_AND_RET : R600_LDS_1A1D_RET <0x29, "LDS_AND", - [(set i32:$dst, (atomic_load_and_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_and_local_32 i32:$src0, i32:$src1))] >; def LDS_OR_RET : R600_LDS_1A1D_RET <0x2a, "LDS_OR", - [(set i32:$dst, (atomic_load_or_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_or_local_32 i32:$src0, i32:$src1))] >; def LDS_XOR_RET : R600_LDS_1A1D_RET <0x2b, "LDS_XOR", - [(set i32:$dst, (atomic_load_xor_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_xor_local_32 i32:$src0, i32:$src1))] >; def LDS_MIN_INT_RET : R600_LDS_1A1D_RET <0x25, "LDS_MIN_INT", - [(set i32:$dst, (atomic_load_min_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_min_local_32 i32:$src0, i32:$src1))] >; def LDS_MAX_INT_RET : R600_LDS_1A1D_RET <0x26, "LDS_MAX_INT", - [(set i32:$dst, (atomic_load_max_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_max_local_32 i32:$src0, i32:$src1))] >; def LDS_MIN_UINT_RET : R600_LDS_1A1D_RET <0x27, "LDS_MIN_UINT", - [(set i32:$dst, (atomic_load_umin_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_umin_local_32 i32:$src0, i32:$src1))] >; def LDS_MAX_UINT_RET : R600_LDS_1A1D_RET <0x28, "LDS_MAX_UINT", - [(set i32:$dst, (atomic_load_umax_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_load_umax_local_32 i32:$src0, i32:$src1))] >; def LDS_WRXCHG_RET : R600_LDS_1A1D_RET <0x2d, "LDS_WRXCHG", - [(set i32:$dst, (atomic_swap_local i32:$src0, i32:$src1))] + [(set i32:$dst, (atomic_swap_local_32 i32:$src0, i32:$src1))] >; def LDS_CMPST_RET : R600_LDS_1A2D_RET <0x30, "LDS_CMPST", - [(set i32:$dst, (atomic_cmp_swap_local i32:$src0, i32:$src1, i32:$src2))] + [(set i32:$dst, (atomic_cmp_swap_local_32 i32:$src0, i32:$src1, i32:$src2))] >; def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET", [(set (i32 R600_Reg32:$dst), (load_local R600_Reg32:$src0))] diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 617aa330e83..a00ff76be7a 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -564,76 +564,76 @@ defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswa v2i64, VReg_128>; defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap", - VGPR_32, i32, atomic_swap_global>; + VGPR_32, i32, atomic_swap_global_32>; defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2", - VReg_64, i64, atomic_swap_global>; + VReg_64, i64, atomic_swap_global_64>; defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add", - VGPR_32, i32, atomic_add_global>; + VGPR_32, i32, atomic_load_add_global_32>; defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub", - VGPR_32, i32, atomic_sub_global>; + VGPR_32, i32, atomic_load_sub_global_32>; defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin", - VGPR_32, i32, atomic_min_global>; + VGPR_32, i32, atomic_load_min_global_32>; defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin", - VGPR_32, i32, atomic_umin_global>; + VGPR_32, i32, atomic_load_umin_global_32>; defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax", - VGPR_32, i32, atomic_max_global>; + VGPR_32, i32, atomic_load_max_global_32>; defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax", - VGPR_32, i32, atomic_umax_global>; + VGPR_32, i32, atomic_load_umax_global_32>; defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and", - VGPR_32, i32, atomic_and_global>; + VGPR_32, i32, atomic_load_and_global_32>; defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or", - VGPR_32, i32, atomic_or_global>; + VGPR_32, i32, atomic_load_or_global_32>; defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor", - VGPR_32, i32, atomic_xor_global>; + VGPR_32, i32, atomic_load_xor_global_32>; defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc", - VGPR_32, i32, atomic_inc_global>; + VGPR_32, i32, atomic_inc_global_32>; defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec", - VGPR_32, i32, atomic_dec_global>; + VGPR_32, i32, atomic_dec_global_32>; defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2", - VReg_64, i64, atomic_add_global>; + VReg_64, i64, atomic_load_add_global_64>; defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2", - VReg_64, i64, atomic_sub_global>; + VReg_64, i64, atomic_load_sub_global_64>; defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2", - VReg_64, i64, atomic_min_global>; + VReg_64, i64, atomic_load_min_global_64>; defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2", - VReg_64, i64, atomic_umin_global>; + VReg_64, i64, atomic_load_umin_global_64>; defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2", - VReg_64, i64, atomic_max_global>; + VReg_64, i64, atomic_load_max_global_64>; defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2", - VReg_64, i64, atomic_umax_global>; + VReg_64, i64, atomic_load_umax_global_64>; defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2", - VReg_64, i64, atomic_and_global>; + VReg_64, i64, atomic_load_and_global_64>; defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2", - VReg_64, i64, atomic_or_global>; + VReg_64, i64, atomic_load_or_global_64>; defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2", - VReg_64, i64, atomic_xor_global>; + VReg_64, i64, atomic_load_xor_global_64>; defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2", - VReg_64, i64, atomic_inc_global>; + VReg_64, i64, atomic_inc_global_64>; defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2", - VReg_64, i64, atomic_dec_global>; + VReg_64, i64, atomic_dec_global_64>; } // End is_flat_global = 1 } // End SubtargetPredicate = HasFlatGlobalInsts @@ -795,33 +795,33 @@ def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, v4i32, VReg_128>; def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat_32, i32>; def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64, VReg_64>; -def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>; -def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>; -def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global, i32>; -def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global, i32>; -def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>; -def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>; -def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>; -def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>; -def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>; -def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>; -def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>; +def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>; +def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>; +def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global_32, i32>; +def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global_32, i32>; +def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_load_and_global_32, i32>; +def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_load_max_global_32, i32>; +def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_load_umax_global_32, i32>; +def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>; +def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>; +def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>; +def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>; def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global, i32, v2i32>; -def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>; - -def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_add_global, i64>; -def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_sub_global, i64>; -def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global, i64>; -def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global, i64>; -def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_and_global, i64>; -def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_max_global, i64>; -def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_umax_global, i64>; -def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_min_global, i64>; -def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_umin_global, i64>; -def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_or_global, i64>; -def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global, i64>; +def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>; + +def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>; +def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>; +def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global_64, i64>; +def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global_64, i64>; +def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_load_and_global_64, i64>; +def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_load_max_global_64, i64>; +def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_load_umax_global_64, i64>; +def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>; +def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>; +def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>; +def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>; def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>; -def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>; +def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>; def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>; def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>; @@ -899,33 +899,33 @@ def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>; def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORD, store_atomic_global, i32>; def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORDX2, store_atomic_global, i64, VReg_64>; -def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_RTN, atomic_add_global, i32>; -def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_RTN, atomic_sub_global, i32>; -def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_RTN, atomic_inc_global, i32>; -def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_RTN, atomic_dec_global, i32>; -def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_RTN, atomic_and_global, i32>; -def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_RTN, atomic_max_global, i32>; -def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_RTN, atomic_umax_global, i32>; -def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_RTN, atomic_min_global, i32>; -def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_RTN, atomic_umin_global, i32>; -def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_RTN, atomic_or_global, i32>; -def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_RTN, atomic_swap_global, i32>; +def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>; +def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>; +def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_RTN, atomic_inc_global_32, i32>; +def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_RTN, atomic_dec_global_32, i32>; +def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_RTN, atomic_load_and_global_32, i32>; +def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_RTN, atomic_load_max_global_32, i32>; +def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_RTN, atomic_load_umax_global_32, i32>; +def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>; +def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>; +def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>; +def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>; def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global, i32, v2i32>; -def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_RTN, atomic_xor_global, i32>; - -def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_X2_RTN, atomic_add_global, i64>; -def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_X2_RTN, atomic_sub_global, i64>; -def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_X2_RTN, atomic_inc_global, i64>; -def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_X2_RTN, atomic_dec_global, i64>; -def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_X2_RTN, atomic_and_global, i64>; -def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_X2_RTN, atomic_max_global, i64>; -def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_X2_RTN, atomic_umax_global, i64>; -def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_X2_RTN, atomic_min_global, i64>; -def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_X2_RTN, atomic_umin_global, i64>; -def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_X2_RTN, atomic_or_global, i64>; -def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_X2_RTN, atomic_swap_global, i64>; +def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>; + +def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>; +def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>; +def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_X2_RTN, atomic_inc_global_64, i64>; +def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_X2_RTN, atomic_dec_global_64, i64>; +def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_X2_RTN, atomic_load_and_global_64, i64>; +def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_X2_RTN, atomic_load_max_global_64, i64>; +def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_X2_RTN, atomic_load_umax_global_64, i64>; +def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>; +def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>; +def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>; +def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>; def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>; -def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>; +def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>; def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32, atomic_fadd_global_noret, f32>; def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_PK_ADD_F16, atomic_pk_fadd_global_noret, v2f16>; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 18ec4b487b8..7e174dc75d4 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -300,19 +300,37 @@ class isPackedType<ValueType SrcVT> { // PatFrags for global memory operations //===----------------------------------------------------------------------===// -defm atomic_inc_global : global_binary_atomic_op<SIatomic_inc>; -defm atomic_dec_global : global_binary_atomic_op<SIatomic_dec>; +foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { +let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { -def atomic_inc_local : local_binary_atomic_op<SIatomic_inc>; -def atomic_dec_local : local_binary_atomic_op<SIatomic_dec>; -def atomic_load_fadd_local : local_binary_atomic_op<atomic_load_fadd>; -def atomic_load_fmin_local : local_binary_atomic_op<SIatomic_fmin>; -def atomic_load_fmax_local : local_binary_atomic_op<SIatomic_fmax>; -def atomic_fadd_global_noret : global_binary_atomic_op_frag<SIglobal_atomic_fadd>; -def atomic_pk_fadd_global_noret : global_binary_atomic_op_frag<SIglobal_atomic_pk_fadd>; +defm atomic_inc_#as : binary_atomic_op<SIatomic_inc>; +defm atomic_dec_#as : binary_atomic_op<SIatomic_dec>; +defm atomic_load_fmin_#as : binary_atomic_op<SIatomic_fmin, 0>; +defm atomic_load_fmax_#as : binary_atomic_op<SIatomic_fmax, 0>; +} // End let AddressSpaces = ... +} // End foreach AddrSpace + +def atomic_fadd_global_noret : PatFrag< + (ops node:$ptr, node:$value), + (SIglobal_atomic_fadd node:$ptr, node:$value)> { + // FIXME: Move this + let MemoryVT = f32; + let IsAtomic = 1; + let AddressSpaces = StoreAddress_global.AddrSpaces; +} + +def atomic_pk_fadd_global_noret : PatFrag< + (ops node:$ptr, node:$value), + (SIglobal_atomic_pk_fadd node:$ptr, node:$value)> { + // FIXME: Move this + let MemoryVT = v2f16; + let IsAtomic = 1; + let AddressSpaces = StoreAddress_global.AddrSpaces; +} + //===----------------------------------------------------------------------===// // SDNodes PatFrags for loads/stores with a glue input. // This is for SDNodes and PatFrag for local loads and stores to @@ -584,15 +602,21 @@ def lshl_rev : PatFrag < >; multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0, - SDTypeProfile tc = SDTAtomic2> { + SDTypeProfile tc = SDTAtomic2, + bit IsInt = 1> { def _glue : SDNode < !if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, tc, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] >; - def _local_m0 : local_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>; - def _region_m0 : region_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>; + let AddressSpaces = StoreAddress_local.AddrSpaces in { + defm _local_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>; + } + + let AddressSpaces = StoreAddress_region.AddrSpaces in { + defm _region_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>; + } } defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">; @@ -607,16 +631,19 @@ defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">; defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">; defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">; defm atomic_swap : SIAtomicM0Glue2 <"SWAP">; -defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32>; -defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32>; -defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32>; +defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>; +defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32, 0>; +defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32, 0>; def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] >; -def atomic_cmp_swap_local_m0 : AtomicCmpSwapLocal<atomic_cmp_swap_glue>; -def atomic_cmp_swap_region_m0 : AtomicCmpSwapRegion<atomic_cmp_swap_glue>; +// FIXME: +def atomic_cmp_swap_local_m0_32 : AtomicCmpSwapLocal<atomic_cmp_swap_glue>; +def atomic_cmp_swap_region_m0_32 : AtomicCmpSwapRegion<atomic_cmp_swap_glue>; +def atomic_cmp_swap_local_m0_64 : AtomicCmpSwapLocal<atomic_cmp_swap_glue>; +def atomic_cmp_swap_region_m0_64 : AtomicCmpSwapRegion<atomic_cmp_swap_glue>; def as_i1imm : SDNodeXForm<imm, [{ |