diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-08-01 03:22:40 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-08-01 03:22:40 +0000 |
| commit | 70e20c0f0855f3278c476bf5d2e7ae226fa0cbdc (patch) | |
| tree | 7746d89b11d41b62f1ad96ff16aa727b1a2fc3b0 /llvm/lib | |
| parent | 3baf4d3418dadf3cbf78b98748123985b162b8a7 (diff) | |
| download | bcm5719-llvm-70e20c0f0855f3278c476bf5d2e7ae226fa0cbdc.tar.gz bcm5719-llvm-70e20c0f0855f3278c476bf5d2e7ae226fa0cbdc.zip | |
AMDGPU: Correct FP atomic patterns
These need to use an fadd, not an add. Also make the noret part clear
in the name.
llvm-svn: 367505
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/BUFInstructions.td | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/FLATInstructions.td | 11 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 4 |
3 files changed, 10 insertions, 9 deletions
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 62a19d848af..854d4c573bf 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1043,10 +1043,10 @@ def BUFFER_WBINVL1 : MUBUF_Invalidate <"buffer_wbinvl1", let SubtargetPredicate = HasAtomicFaddInsts in { defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_NO_RTN < - "buffer_atomic_add_f32", VGPR_32, f32, atomic_add_global + "buffer_atomic_add_f32", VGPR_32, f32, atomic_fadd_global_noret >; defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN < - "buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_add_global + "buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_pk_fadd_global_noret >; } // End SubtargetPredicate = HasAtomicFaddInsts diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 889f60dae92..617aa330e83 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -686,10 +686,10 @@ let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in { let SubtargetPredicate = HasAtomicFaddInsts, is_flat_global = 1 in { defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN < - "global_atomic_add_f32", VGPR_32, f32, atomic_add_global + "global_atomic_add_f32", VGPR_32, f32, atomic_fadd_global_noret >; defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN < - "global_atomic_pk_add_f16", VGPR_32, v2f16, atomic_add_global + "global_atomic_pk_add_f16", VGPR_32, v2f16, atomic_pk_fadd_global_noret >; } // End SubtargetPredicate = HasAtomicFaddInsts @@ -847,9 +847,6 @@ def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>; } // End OtherPredicates = [HasFlatAddressSpace] -def atomic_fadd_global : global_binary_atomic_op_frag<SIglobal_atomic_fadd>; -def atomic_pk_fadd_global : global_binary_atomic_op_frag<SIglobal_atomic_pk_fadd>; - let OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 in { def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>; @@ -930,8 +927,8 @@ def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_X2_RTN, atomic_swap_global, i64>; def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>; def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>; -def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32, atomic_fadd_global, f32>; -def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_PK_ADD_F16, atomic_pk_fadd_global, v2f16>; +def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32, atomic_fadd_global_noret, f32>; +def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_PK_ADD_F16, atomic_pk_fadd_global_noret, v2f16>; } // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 18faec314c3..18ec4b487b8 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -309,6 +309,10 @@ def atomic_load_fadd_local : local_binary_atomic_op<atomic_load_fadd>; def atomic_load_fmin_local : local_binary_atomic_op<SIatomic_fmin>; def atomic_load_fmax_local : local_binary_atomic_op<SIatomic_fmax>; +def atomic_fadd_global_noret : global_binary_atomic_op_frag<SIglobal_atomic_fadd>; +def atomic_pk_fadd_global_noret : global_binary_atomic_op_frag<SIglobal_atomic_pk_fadd>; + + //===----------------------------------------------------------------------===// // SDNodes PatFrags for loads/stores with a glue input. // This is for SDNodes and PatFrag for local loads and stores to |

