diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-09-05 16:24:58 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-09-05 16:24:58 +0000 |
| commit | 8ae596106592f8881a16185f3c56b58c024f2f9c (patch) | |
| tree | 755c41aa181ee297cb410c1d21dc35fb1abe4c27 | |
| parent | 1fcea42e674bdb912701e14dfd4948c99a7d159e (diff) | |
| download | bcm5719-llvm-8ae596106592f8881a16185f3c56b58c024f2f9c.tar.gz bcm5719-llvm-8ae596106592f8881a16185f3c56b58c024f2f9c.zip | |
R600/SI: Use same complex patterns for DS atomics
This fixes hitting the same negative base offset problem
that was already fixed for regular loads and stores.
llvm-svn: 217256
| -rw-r--r-- | llvm/lib/Target/R600/SIInstructions.td | 114 | ||||
| -rw-r--r-- | llvm/test/CodeGen/R600/atomic_cmp_swap_local.ll | 15 | ||||
| -rw-r--r-- | llvm/test/CodeGen/R600/local-atomics.ll | 29 |
3 files changed, 90 insertions, 68 deletions
diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td index 56de9db372a..be4c0eab44d 100644 --- a/llvm/lib/Target/R600/SIInstructions.td +++ b/llvm/lib/Target/R600/SIInstructions.td @@ -2572,17 +2572,10 @@ def : Pat < (EXTRACT_SUBREG $value, sub1), $offset0, $offset1) >; -multiclass DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> { - def : Pat < - (frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$value), - (inst (i1 0), $ptr, $value, (as_i16imm $offset)) - >; - - def : Pat < - (frag i32:$ptr, vt:$val), - (inst 0, $ptr, $val, 0) - >; -} +class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat < + (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value), + (inst (i1 0), $ptr, $value, (as_i16imm $offset)) +>; // Special case of DSAtomicRetPat for add / sub 1 -> inc / dec // @@ -2594,69 +2587,56 @@ multiclass DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> { // We also load this -1 with s_mov_b32 / s_mov_b64 even though this // needs to be a VGPR. The SGPR copy pass will fix this, and it's // easier since there is no v_mov_b64. -multiclass DSAtomicIncRetPat<DS inst, ValueType vt, - Instruction LoadImm, PatFrag frag> { - def : Pat < - (frag (add i32:$ptr, (i32 IMM16bit:$offset)), (vt 1)), - (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset)) - >; - - def : Pat < - (frag i32:$ptr, (vt 1)), - (inst 0, $ptr, (LoadImm (vt -1)), 0) - >; -} +class DSAtomicIncRetPat<DS inst, ValueType vt, + Instruction LoadImm, PatFrag frag> : Pat < + (frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)), + (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset)) +>; -multiclass DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> { - def : Pat < - (frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$cmp, vt:$swap), - (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset)) - >; - def : Pat < - (frag i32:$ptr, vt:$cmp, vt:$swap), - (inst 0, $ptr, $cmp, $swap, 0) - >; -} +class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat < + (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap), + (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset)) +>; // 32-bit atomics. -defm : DSAtomicIncRetPat<DS_INC_RTN_U32, i32, - S_MOV_B32, atomic_load_add_local>; -defm : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32, - S_MOV_B32, atomic_load_sub_local>; - -defm : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, atomic_swap_local>; -defm : DSAtomicRetPat<DS_ADD_RTN_U32, i32, atomic_load_add_local>; -defm : DSAtomicRetPat<DS_SUB_RTN_U32, i32, atomic_load_sub_local>; -defm : DSAtomicRetPat<DS_AND_RTN_B32, i32, atomic_load_and_local>; -defm : DSAtomicRetPat<DS_OR_RTN_B32, i32, atomic_load_or_local>; -defm : DSAtomicRetPat<DS_XOR_RTN_B32, i32, atomic_load_xor_local>; -defm : DSAtomicRetPat<DS_MIN_RTN_I32, i32, atomic_load_min_local>; -defm : DSAtomicRetPat<DS_MAX_RTN_I32, i32, atomic_load_max_local>; -defm : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local>; -defm : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local>; - -defm : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_32_local>; +def : DSAtomicIncRetPat<DS_INC_RTN_U32, i32, + S_MOV_B32, atomic_load_add_local>; +def : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32, + S_MOV_B32, atomic_load_sub_local>; + +def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, atomic_swap_local>; +def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, atomic_load_add_local>; +def : DSAtomicRetPat<DS_SUB_RTN_U32, i32, atomic_load_sub_local>; +def : DSAtomicRetPat<DS_AND_RTN_B32, i32, atomic_load_and_local>; +def : DSAtomicRetPat<DS_OR_RTN_B32, i32, atomic_load_or_local>; +def : DSAtomicRetPat<DS_XOR_RTN_B32, i32, atomic_load_xor_local>; +def : DSAtomicRetPat<DS_MIN_RTN_I32, i32, atomic_load_min_local>; +def : DSAtomicRetPat<DS_MAX_RTN_I32, i32, atomic_load_max_local>; +def : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local>; +def : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local>; + +def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_32_local>; // 64-bit atomics. -defm : DSAtomicIncRetPat<DS_INC_RTN_U64, i64, - S_MOV_B64, atomic_load_add_local>; -defm : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64, - S_MOV_B64, atomic_load_sub_local>; - -defm : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, atomic_swap_local>; -defm : DSAtomicRetPat<DS_ADD_RTN_U64, i64, atomic_load_add_local>; -defm : DSAtomicRetPat<DS_SUB_RTN_U64, i64, atomic_load_sub_local>; -defm : DSAtomicRetPat<DS_AND_RTN_B64, i64, atomic_load_and_local>; -defm : DSAtomicRetPat<DS_OR_RTN_B64, i64, atomic_load_or_local>; -defm : DSAtomicRetPat<DS_XOR_RTN_B64, i64, atomic_load_xor_local>; -defm : DSAtomicRetPat<DS_MIN_RTN_I64, i64, atomic_load_min_local>; -defm : DSAtomicRetPat<DS_MAX_RTN_I64, i64, atomic_load_max_local>; -defm : DSAtomicRetPat<DS_MIN_RTN_U64, i64, atomic_load_umin_local>; -defm : DSAtomicRetPat<DS_MAX_RTN_U64, i64, atomic_load_umax_local>; - -defm : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>; +def : DSAtomicIncRetPat<DS_INC_RTN_U64, i64, + S_MOV_B64, atomic_load_add_local>; +def : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64, + S_MOV_B64, atomic_load_sub_local>; + +def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, atomic_swap_local>; +def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, atomic_load_add_local>; +def : DSAtomicRetPat<DS_SUB_RTN_U64, i64, atomic_load_sub_local>; +def : DSAtomicRetPat<DS_AND_RTN_B64, i64, atomic_load_and_local>; +def : DSAtomicRetPat<DS_OR_RTN_B64, i64, atomic_load_or_local>; +def : DSAtomicRetPat<DS_XOR_RTN_B64, i64, atomic_load_xor_local>; +def : DSAtomicRetPat<DS_MIN_RTN_I64, i64, atomic_load_min_local>; +def : DSAtomicRetPat<DS_MAX_RTN_I64, i64, atomic_load_max_local>; +def : DSAtomicRetPat<DS_MIN_RTN_U64, i64, atomic_load_umin_local>; +def : DSAtomicRetPat<DS_MAX_RTN_U64, i64, atomic_load_umax_local>; + +def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>; //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/R600/atomic_cmp_swap_local.ll b/llvm/test/CodeGen/R600/atomic_cmp_swap_local.ll index b04874bfcf1..d07fde3934a 100644 --- a/llvm/test/CodeGen/R600/atomic_cmp_swap_local.ll +++ b/llvm/test/CodeGen/R600/atomic_cmp_swap_local.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s ; FUNC-LABEL: @lds_atomic_cmpxchg_ret_i32_offset: ; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb @@ -35,3 +36,17 @@ define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrs store i64 %result, i64 addrspace(1)* %out, align 8 ret void } + +; FUNC-LABEL: @lds_atomic_cmpxchg_ret_i32_bad_si_offset +; SI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0 +; CI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 +; SI: S_ENDPGM +define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap, i32 %a, i32 %b) nounwind { + %sub = sub i32 %a, %b + %add = add i32 %sub, 4 + %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add + %pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic + %result = extractvalue { i32, i1 } %pair, 0 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/llvm/test/CodeGen/R600/local-atomics.ll b/llvm/test/CodeGen/R600/local-atomics.ll index 5a44951055e..043269f4017 100644 --- a/llvm/test/CodeGen/R600/local-atomics.ll +++ b/llvm/test/CodeGen/R600/local-atomics.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s ; FUNC-LABEL: @lds_atomic_xchg_ret_i32: ; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]], @@ -47,6 +48,19 @@ define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace ret void } +; FUNC-LABEL: @lds_atomic_add_ret_i32_bad_si_offset +; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0 +; CI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 +; SI: S_ENDPGM +define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind { + %sub = sub i32 %a, %b + %add = add i32 %sub, 4 + %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add + %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + ; FUNC-LABEL: @lds_atomic_inc_ret_i32: ; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1 ; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]] @@ -70,6 +84,19 @@ define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace ret void } +; FUNC-LABEL: @lds_atomic_inc_ret_i32_bad_si_offset: +; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0 +; CI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 +; SI: S_ENDPGM +define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind { + %sub = sub i32 %a, %b + %add = add i32 %sub, 4 + %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add + %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + ; FUNC-LABEL: @lds_atomic_sub_ret_i32: ; SI: DS_SUB_RTN_U32 ; SI: S_ENDPGM |

