diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-06-09 23:42:54 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-06-09 23:42:54 +0000 |
| commit | 7757c59e481c8cf22223ceec3f49a136afb1d6ad (patch) | |
| tree | c7c3d8dfa0bbe283625e79df8af95f4e723137f5 /llvm/lib/Target | |
| parent | 887018179ac327785b75521f6baf160595b0d8ac (diff) | |
| download | bcm5719-llvm-7757c59e481c8cf22223ceec3f49a136afb1d6ad.tar.gz bcm5719-llvm-7757c59e481c8cf22223ceec3f49a136afb1d6ad.zip | |
AMDGPU: Fix flat atomics
The flat atomics could already be selected, but only
when using flat instructions for global memory. Add
patterns for flat addresses.
llvm-svn: 272345
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 17 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 19 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/CIInstructions.td | 71 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 24 |
4 files changed, 90 insertions, 41 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 6143631856b..4c8aa4b0c2a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -138,6 +138,10 @@ private: SDValue &ImmOffset) const; bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, SDValue &ImmOffset, SDValue &VOffset) const; + + bool SelectFlat(SDValue Addr, SDValue &VAddr, + SDValue &SLC, SDValue &TFE) const; + bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, bool &Imm) const; bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, @@ -1236,6 +1240,15 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, return true; } +bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr, + SDValue &VAddr, + SDValue &SLC, + SDValue &TFE) const { + VAddr = Addr; + TFE = SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); + return true; +} + /// /// \param EncodedOffset This is the immediate value that will be encoded /// directly into the instruction. On SI/CI the \p EncodedOffset @@ -1500,6 +1513,10 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { MemSDNode *Mem = cast<MemSDNode>(N); unsigned AS = Mem->getAddressSpace(); + if (AS == AMDGPUAS::FLAT_ADDRESS) { + SelectCode(N); + return; + } MVT VT = N->getSimpleValueType(0); bool Is32 = (VT == MVT::i32); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 2874a55bef3..4b6fc47cdb7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -394,6 +394,12 @@ class global_binary_atomic_op<SDNode atomic_op> : PatFrag< [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}] >; +class flat_binary_atomic_op<SDNode atomic_op> : PatFrag< + (ops node:$ptr, node:$value), + (atomic_op node:$ptr, node:$value), + [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}] +>; + def atomic_swap_global : global_binary_atomic_op<atomic_swap>; def atomic_add_global : global_binary_atomic_op<atomic_load_add>; def atomic_and_global : global_binary_atomic_op<atomic_load_and>; @@ -412,6 +418,19 @@ def atomic_cmp_swap_global_nortn : PatFrag< [{ return SDValue(N, 0).use_empty(); }] >; +def atomic_swap_flat : flat_binary_atomic_op<atomic_swap>; +def atomic_add_flat : flat_binary_atomic_op<atomic_load_add>; +def atomic_and_flat : flat_binary_atomic_op<atomic_load_and>; +def atomic_max_flat : flat_binary_atomic_op<atomic_load_max>; +def atomic_min_flat : flat_binary_atomic_op<atomic_load_min>; +def atomic_or_flat : flat_binary_atomic_op<atomic_load_or>; +def atomic_sub_flat : flat_binary_atomic_op<atomic_load_sub>; +def atomic_umax_flat : flat_binary_atomic_op<atomic_load_umax>; +def atomic_umin_flat : flat_binary_atomic_op<atomic_load_umin>; +def atomic_xor_flat : flat_binary_atomic_op<atomic_load_xor>; + +def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>; + //===----------------------------------------------------------------------===// // Misc Pattern Fragments //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/CIInstructions.td b/llvm/lib/Target/AMDGPU/CIInstructions.td index 9fb6e6039e5..f9a9f79126b 100644 --- a/llvm/lib/Target/AMDGPU/CIInstructions.td +++ b/llvm/lib/Target/AMDGPU/CIInstructions.td @@ -153,82 +153,84 @@ defm FLAT_STORE_DWORDX3 : FLAT_Store_Helper < flat<0x1f, 0x1e>, "flat_store_dwordx3", VReg_96 >; defm FLAT_ATOMIC_SWAP : FLAT_ATOMIC < - flat<0x30, 0x40>, "flat_atomic_swap", VGPR_32 + flat<0x30, 0x40>, "flat_atomic_swap", VGPR_32, i32, atomic_swap_flat >; defm FLAT_ATOMIC_CMPSWAP : FLAT_ATOMIC < - flat<0x31, 0x41>, "flat_atomic_cmpswap", VGPR_32, VReg_64 + flat<0x31, 0x41>, "flat_atomic_cmpswap", VGPR_32, i32, + atomic_cmp_swap_flat, v2i32, VReg_64 >; defm FLAT_ATOMIC_ADD : FLAT_ATOMIC < - flat<0x32, 0x42>, "flat_atomic_add", VGPR_32 + flat<0x32, 0x42>, "flat_atomic_add", VGPR_32, i32, atomic_add_flat >; defm FLAT_ATOMIC_SUB : FLAT_ATOMIC < - flat<0x33, 0x43>, "flat_atomic_sub", VGPR_32 + flat<0x33, 0x43>, "flat_atomic_sub", VGPR_32, i32, atomic_sub_flat >; defm FLAT_ATOMIC_SMIN : FLAT_ATOMIC < - flat<0x35, 0x44>, "flat_atomic_smin", VGPR_32 + flat<0x35, 0x44>, "flat_atomic_smin", VGPR_32, i32, atomic_min_flat >; defm FLAT_ATOMIC_UMIN : FLAT_ATOMIC < - flat<0x36, 0x45>, "flat_atomic_umin", VGPR_32 + flat<0x36, 0x45>, "flat_atomic_umin", VGPR_32, i32, atomic_umin_flat >; defm FLAT_ATOMIC_SMAX : FLAT_ATOMIC < - flat<0x37, 0x46>, "flat_atomic_smax", VGPR_32 + flat<0x37, 0x46>, "flat_atomic_smax", VGPR_32, i32, atomic_max_flat >; defm FLAT_ATOMIC_UMAX : FLAT_ATOMIC < - flat<0x38, 0x47>, "flat_atomic_umax", VGPR_32 + flat<0x38, 0x47>, "flat_atomic_umax", VGPR_32, i32, atomic_umax_flat >; defm FLAT_ATOMIC_AND : FLAT_ATOMIC < - flat<0x39, 0x48>, "flat_atomic_and", VGPR_32 + flat<0x39, 0x48>, "flat_atomic_and", VGPR_32, i32, atomic_and_flat >; defm FLAT_ATOMIC_OR : FLAT_ATOMIC < - flat<0x3a, 0x49>, "flat_atomic_or", VGPR_32 + flat<0x3a, 0x49>, "flat_atomic_or", VGPR_32, i32, atomic_or_flat >; defm FLAT_ATOMIC_XOR : FLAT_ATOMIC < - flat<0x3b, 0x4a>, "flat_atomic_xor", VGPR_32 + flat<0x3b, 0x4a>, "flat_atomic_xor", VGPR_32, i32, atomic_xor_flat >; defm FLAT_ATOMIC_INC : FLAT_ATOMIC < - flat<0x3c, 0x4b>, "flat_atomic_inc", VGPR_32 + flat<0x3c, 0x4b>, "flat_atomic_inc", VGPR_32, i32, atomic_inc_flat >; defm FLAT_ATOMIC_DEC : FLAT_ATOMIC < - flat<0x3d, 0x4c>, "flat_atomic_dec", VGPR_32 + flat<0x3d, 0x4c>, "flat_atomic_dec", VGPR_32, i32, atomic_dec_flat >; defm FLAT_ATOMIC_SWAP_X2 : FLAT_ATOMIC < - flat<0x50, 0x60>, "flat_atomic_swap_x2", VReg_64 + flat<0x50, 0x60>, "flat_atomic_swap_x2", VReg_64, i64, atomic_swap_flat >; defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_ATOMIC < - flat<0x51, 0x61>, "flat_atomic_cmpswap_x2", VReg_64, VReg_128 + flat<0x51, 0x61>, "flat_atomic_cmpswap_x2", VReg_64, i64, + atomic_cmp_swap_flat, v2i64, VReg_128 >; defm FLAT_ATOMIC_ADD_X2 : FLAT_ATOMIC < - flat<0x52, 0x62>, "flat_atomic_add_x2", VReg_64 + flat<0x52, 0x62>, "flat_atomic_add_x2", VReg_64, i64, atomic_add_flat >; defm FLAT_ATOMIC_SUB_X2 : FLAT_ATOMIC < - flat<0x53, 0x63>, "flat_atomic_sub_x2", VReg_64 + flat<0x53, 0x63>, "flat_atomic_sub_x2", VReg_64, i64, atomic_sub_flat >; defm FLAT_ATOMIC_SMIN_X2 : FLAT_ATOMIC < - flat<0x55, 0x64>, "flat_atomic_smin_x2", VReg_64 + flat<0x55, 0x64>, "flat_atomic_smin_x2", VReg_64, i64, atomic_min_flat >; defm FLAT_ATOMIC_UMIN_X2 : FLAT_ATOMIC < - flat<0x56, 0x65>, "flat_atomic_umin_x2", VReg_64 + flat<0x56, 0x65>, "flat_atomic_umin_x2", VReg_64, i64, atomic_umin_flat >; defm FLAT_ATOMIC_SMAX_X2 : FLAT_ATOMIC < - flat<0x57, 0x66>, "flat_atomic_smax_x2", VReg_64 + flat<0x57, 0x66>, "flat_atomic_smax_x2", VReg_64, i64, atomic_max_flat >; defm FLAT_ATOMIC_UMAX_X2 : FLAT_ATOMIC < - flat<0x58, 0x67>, "flat_atomic_umax_x2", VReg_64 + flat<0x58, 0x67>, "flat_atomic_umax_x2", VReg_64, i64, atomic_umax_flat >; defm FLAT_ATOMIC_AND_X2 : FLAT_ATOMIC < - flat<0x59, 0x68>, "flat_atomic_and_x2", VReg_64 + flat<0x59, 0x68>, "flat_atomic_and_x2", VReg_64, i64, atomic_and_flat >; defm FLAT_ATOMIC_OR_X2 : FLAT_ATOMIC < - flat<0x5a, 0x69>, "flat_atomic_or_x2", VReg_64 + flat<0x5a, 0x69>, "flat_atomic_or_x2", VReg_64, i64, atomic_or_flat >; defm FLAT_ATOMIC_XOR_X2 : FLAT_ATOMIC < - flat<0x5b, 0x6a>, "flat_atomic_xor_x2", VReg_64 + flat<0x5b, 0x6a>, "flat_atomic_xor_x2", VReg_64, i64, atomic_xor_flat >; defm FLAT_ATOMIC_INC_X2 : FLAT_ATOMIC < - flat<0x5c, 0x6b>, "flat_atomic_inc_x2", VReg_64 + flat<0x5c, 0x6b>, "flat_atomic_inc_x2", VReg_64, i64, atomic_inc_flat >; defm FLAT_ATOMIC_DEC_X2 : FLAT_ATOMIC < - flat<0x5d, 0x6c>, "flat_atomic_dec_x2", VReg_64 + flat<0x5d, 0x6c>, "flat_atomic_dec_x2", VReg_64, i64, atomic_dec_flat >; } // End SubtargetPredicate = isCIVI @@ -238,22 +240,24 @@ defm FLAT_ATOMIC_DEC_X2 : FLAT_ATOMIC < let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst, DisableVIDecoder = 1 in { defm FLAT_ATOMIC_FCMPSWAP : FLAT_ATOMIC < - flat<0x3e>, "flat_atomic_fcmpswap", VGPR_32, VReg_64 + flat<0x3e>, "flat_atomic_fcmpswap", VGPR_32, f32, + null_frag, v2f32, VReg_64 >; defm FLAT_ATOMIC_FMIN : FLAT_ATOMIC < - flat<0x3f>, "flat_atomic_fmin", VGPR_32 + flat<0x3f>, "flat_atomic_fmin", VGPR_32, f32 >; defm FLAT_ATOMIC_FMAX : FLAT_ATOMIC < - flat<0x40>, "flat_atomic_fmax", VGPR_32 + flat<0x40>, "flat_atomic_fmax", VGPR_32, f32 >; defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_ATOMIC < - flat<0x5e>, "flat_atomic_fcmpswap_x2", VReg_64, VReg_128 + flat<0x5e>, "flat_atomic_fcmpswap_x2", VReg_64, f64, + null_frag, v2f64, VReg_128 >; defm FLAT_ATOMIC_FMIN_X2 : FLAT_ATOMIC < - flat<0x5f>, "flat_atomic_fmin_x2", VReg_64 + flat<0x5f>, "flat_atomic_fmin_x2", VReg_64, f64 >; defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC < - flat<0x60>, "flat_atomic_fmax_x2", VReg_64 + flat<0x60>, "flat_atomic_fmax_x2", VReg_64, f64 >; } // End SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst, DisableVIDecoder = 1 @@ -293,7 +297,8 @@ class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat < >; class FlatStoreAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat < - // atomic store follows aotmic binop convenction so the address comes first + // atomic store follows atomic binop convention so the address comes + // first. (node i64:$addr, vt:$data), (inst $addr, $data, 1, 0, 0) >; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index d7805fd8e37..94932a89e13 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -190,6 +190,9 @@ def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ def atomic_inc_global : global_binary_atomic_op<SIatomic_inc>; def atomic_dec_global : global_binary_atomic_op<SIatomic_dec>; +def atomic_inc_flat : flat_binary_atomic_op<SIatomic_inc>; +def atomic_dec_flat : flat_binary_atomic_op<SIatomic_dec>; + //===----------------------------------------------------------------------===// // SDNodes and PatFrag for local loads and stores to enable s_mov_b32 m0, -1 // to be glued to the memory instructions. @@ -567,6 +570,7 @@ def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">; def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">; def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">; def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">; +def FLATAtomic : ComplexPattern<i64, 3, "SelectFlat">; def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">; def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">; def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">; @@ -3210,32 +3214,36 @@ multiclass FLAT_Store_Helper <flat op, string asm_name, } multiclass FLAT_ATOMIC <flat op, string asm_name, RegisterClass vdst_rc, + ValueType vt, SDPatternOperator atomic = null_frag, + ValueType data_vt = vt, RegisterClass data_rc = vdst_rc, - dag outs_noret = (outs), string asm_noret = asm_name#" $addr, $data"#"$slc"#"$tfe"> { let mayLoad = 1, mayStore = 1, glc = 0, vdst = 0 in { - def "" : FLAT_Pseudo <NAME, outs_noret, + def "" : FLAT_Pseudo <NAME, (outs), (ins VReg_64:$addr, data_rc:$data, slc:$slc, tfe:$tfe), []>, AtomicNoRet <NAME, 0>; - def _ci : FLAT_Real_ci <op.CI, NAME, outs_noret, + def _ci : FLAT_Real_ci <op.CI, NAME, (outs), (ins VReg_64:$addr, data_rc:$data, slc:$slc, tfe:$tfe), asm_noret>; - def _vi : FLAT_Real_vi <op.VI, NAME, outs_noret, + def _vi : FLAT_Real_vi <op.VI, NAME, (outs), (ins VReg_64:$addr, data_rc:$data, slc:$slc, tfe:$tfe), asm_noret>; } let glc = 1, hasPostISelHook = 1 in { - defm _RTN : FLAT_AtomicRet_m <op, (outs vdst_rc:$vdst), - (ins VReg_64:$addr, data_rc:$data, slc:$slc, - tfe:$tfe), - asm_name#" $vdst, $addr, $data glc$slc$tfe", []>; + defm _RTN : FLAT_AtomicRet_m < + op, (outs vdst_rc:$vdst), + (ins VReg_64:$addr, data_rc:$data, slc:$slc, tfe:$tfe), + asm_name#" $vdst, $addr, $data glc$slc$tfe", + [(set vt:$vdst, + (atomic (FLATAtomic i64:$addr, i1:$slc, i1:$tfe), data_vt:$data))] + >; } } |

