summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir374
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir21
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg.mir96
3 files changed, 454 insertions, 37 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir
new file mode 100644
index 00000000000..517e84fa58c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir
@@ -0,0 +1,374 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
+
+---
+name: amdgpu_atomic_cmpxchg_s32_flat
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+
+ ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat
+ ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
+ ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat
+ ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+ ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
+ ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat
+ ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+ ; GFX10: $vcc_hi = IMPLICIT_DEF
+ ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
+ %0:vgpr(p0) = COPY $vgpr0_vgpr1
+ %1:vgpr(s32) = COPY $vgpr2
+ %2:vgpr(s32) = COPY $vgpr3
+ %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2
+ %4:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst 4, addrspace 0)
+ $vgpr0 = COPY %4
+
+...
+
+---
+name: amdgpu_atomic_cmpxchg_s32_flat_gep4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+
+ ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4
+ ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
+ ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+ ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+ ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+ ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+ ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
+ ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4
+ ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+ ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
+ ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4
+ ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+ ; GFX10: $vcc_hi = IMPLICIT_DEF
+ ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
+ ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+ ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX10: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+ ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+ ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+ ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
+ %0:vgpr(p0) = COPY $vgpr0_vgpr1
+ %1:vgpr(s32) = COPY $vgpr2
+ %2:vgpr(s32) = COPY $vgpr3
+ %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2
+ %4:vgpr(s64) = G_CONSTANT i64 4
+ %5:vgpr(p0) = G_GEP %0, %4
+ %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst 4, addrspace 0)
+ $vgpr0 = COPY %6
+
+...
+
+---
+name: amdgpu_atomic_cmpxchg_s64_flat
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+
+ ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat
+ ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
+ ; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
+ ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat
+ ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+ ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
+ ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
+ ; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
+ ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat
+ ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+ ; GFX10: $vcc_hi = IMPLICIT_DEF
+ ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
+ ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
+ ; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
+ %0:vgpr(p0) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = COPY $vgpr2_vgpr3
+ %2:vgpr(s64) = COPY $vgpr4_vgpr5
+ %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2
+ %4:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst 8, addrspace 0)
+ $vgpr0_vgpr1 = COPY %4
+
+...
+
+---
+name: amdgpu_atomic_cmpxchg_s64_flat_gep4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+
+ ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4
+ ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
+ ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
+ ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+ ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+ ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+ ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+ ; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
+ ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4
+ ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+ ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
+ ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
+ ; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
+ ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4
+ ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+ ; GFX10: $vcc_hi = IMPLICIT_DEF
+ ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
+ ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
+ ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
+ ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+ ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX10: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+ ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+ ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+ ; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
+ %0:vgpr(p0) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = COPY $vgpr2_vgpr3
+ %2:vgpr(s64) = COPY $vgpr4_vgpr5
+ %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2
+ %4:vgpr(s64) = G_CONSTANT i64 4
+ %5:vgpr(p0) = G_GEP %0, %4
+ %6:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst 8, addrspace 0)
+ $vgpr0_vgpr1 = COPY %6
+
+...
+
+---
+name: amdgpu_atomic_cmpxchg_s32_flat_gepm4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+
+ ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4
+ ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec
+ ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+ ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+ ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+ ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+ ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
+ ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4
+ ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+ ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec
+ ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+ ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+ ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX9: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+ ; GFX9: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+ ; GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+ ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
+ ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4
+ ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+ ; GFX10: $vcc_hi = IMPLICIT_DEF
+ ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec
+ ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+ ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+ ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX10: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+ ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+ ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+ ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
+ %0:vgpr(p0) = COPY $vgpr0_vgpr1
+ %1:vgpr(s32) = COPY $vgpr2
+ %2:vgpr(s32) = COPY $vgpr3
+ %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2
+ %4:vgpr(s64) = G_CONSTANT i64 -4
+ %5:vgpr(p0) = G_GEP %0, %4
+ %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst 4, addrspace 0)
+ $vgpr0 = COPY %6
+
+...
+
+---
+name: amdgpu_atomic_cmpxchg_s32_flat_nortn
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+
+ ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn
+ ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn
+ ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+ ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn
+ ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+ ; GFX10: $vcc_hi = IMPLICIT_DEF
+ ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
+ %0:vgpr(p0) = COPY $vgpr0_vgpr1
+ %1:vgpr(s32) = COPY $vgpr2
+ %2:vgpr(s32) = COPY $vgpr3
+ %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2
+ %4:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst 4, addrspace 0)
+
+...
+
+---
+name: amdgpu_atomic_cmpxchg_s64_flat_nortn
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+
+ ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn
+ ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
+ ; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn
+ ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+ ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
+ ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
+ ; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn
+ ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+ ; GFX10: $vcc_hi = IMPLICIT_DEF
+ ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
+ ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
+ ; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
+ %0:vgpr(p0) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = COPY $vgpr2_vgpr3
+ %2:vgpr(s64) = COPY $vgpr4_vgpr5
+ %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2
+ %4:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst 8, addrspace 0)
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir
index b5fe7334e06..b2526ca975b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir
@@ -11,9 +11,10 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p1), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic 4, addrspace 1)
- ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s32), [[COPY1]]
- ; CHECK: S_ENDPGM 0, implicit [[ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
+ ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic 4, addrspace 1)
+ ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s32), [[COPY1]]
+ ; CHECK: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $vgpr2
%2:_(s32) = COPY $vgpr3
@@ -32,9 +33,10 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic 4)
- ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s32), [[COPY1]]
- ; CHECK: S_ENDPGM 0, implicit [[ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
+ ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic 4)
+ ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s32), [[COPY1]]
+ ; CHECK: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1)
%0:_(p0) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $vgpr2
%2:_(s32) = COPY $vgpr3
@@ -74,9 +76,10 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
- ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p1), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic 8, addrspace 1)
- ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s64), [[COPY1]]
- ; CHECK: S_ENDPGM 0, implicit [[ATOMIC_CMPXCHG]](s64), implicit [[ICMP]](s1)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY2]](s64), [[COPY1]](s64)
+ ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic 8, addrspace 1)
+ ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s64), [[COPY1]]
+ ; CHECK: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s64), implicit [[ICMP]](s1)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(s64) = COPY $vgpr4_vgpr5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg.mir
index 738106de9f7..2cadc7afa9b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg.mir
@@ -3,37 +3,55 @@
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s
---
-name: atomic_cmpxchg_global_i32
+name: atomic_cmpxchg_local_i32
body: |
bb.0:
- liveins: $sgpr0_sgpr1, $sgpr2, $sgpr3
- ; CHECK-LABEL: name: atomic_cmpxchg_global_i32
- ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
- ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
- ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr3
- ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p1), [[COPY1]], [[COPY2]] :: (load store seq_cst 4, addrspace 1)
- %0:_(p1) = COPY $sgpr0_sgpr1
- %1:_(s32) = COPY $sgpr2
- %2:_(s32) = COPY $sgpr3
- %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 4, addrspace 1)
+ liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-LABEL: name: atomic_cmpxchg_local_i32
+ ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store seq_cst 4, addrspace 3)
+ %0:_(p3) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 4, addrspace 3)
...
---
-name: atomic_cmpxchg_local_i32
+name: atomic_cmpxchg_local_i64
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2
- ; CHECK-LABEL: name: atomic_cmpxchg_local_i32
+ ; CHECK-LABEL: name: atomic_cmpxchg_local_i64
; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2
- ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store seq_cst 4, addrspace 3)
+ ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store seq_cst 8, addrspace 3)
%0:_(p3) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $sgpr2
- %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 4, addrspace 3)
+ %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 8, addrspace 3)
+...
+
+---
+name: atomic_cmpxchg_global_i32
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2, $sgpr3
+ ; CHECK-LABEL: name: atomic_cmpxchg_global_i32
+ ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
+ ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store seq_cst 4, addrspace 1)
+ %0:_(p1) = COPY $sgpr0_sgpr1
+ %1:_(s32) = COPY $sgpr2
+ %2:_(s32) = COPY $sgpr3
+ %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 4, addrspace 1)
...
---
@@ -46,26 +64,48 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr3
- ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p1), [[COPY1]], [[COPY2]] :: (load store seq_cst 4, addrspace 1)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
+ ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store seq_cst 8, addrspace 1)
%0:_(p1) = COPY $sgpr0_sgpr1
%1:_(s32) = COPY $sgpr2
%2:_(s32) = COPY $sgpr3
- %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 4, addrspace 1)
+ %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 8, addrspace 1)
...
---
-name: atomic_cmpxchg_local_i64
+name: atomic_cmpxchg_flat_i32
body: |
bb.0:
- liveins: $sgpr0, $sgpr1, $sgpr2
- ; CHECK-LABEL: name: atomic_cmpxchg_local_i64
- ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $sgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
- ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2
- ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store seq_cst 4, addrspace 3)
- %0:_(p3) = COPY $sgpr0
- %1:_(s32) = COPY $sgpr1
- %2:_(s32) = COPY $sgpr2
- %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 4, addrspace 3)
+ liveins: $sgpr0_sgpr1, $sgpr2, $sgpr3
+
+ ; CHECK-LABEL: name: atomic_cmpxchg_flat_i32
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $sgpr0_sgpr1
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
+ ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store seq_cst 4)
+ %0:_(p0) = COPY $sgpr0_sgpr1
+ %1:_(s32) = COPY $sgpr2
+ %2:_(s32) = COPY $sgpr3
+ %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 4, addrspace 0)
+...
+
+---
+name: atomic_cmpxchg_flat_i64
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2, $sgpr3
+
+ ; CHECK-LABEL: name: atomic_cmpxchg_flat_i64
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $sgpr0_sgpr1
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
+ ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store seq_cst 8)
+ %0:_(p0) = COPY $sgpr0_sgpr1
+ %1:_(s32) = COPY $sgpr2
+ %2:_(s32) = COPY $sgpr3
+ %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store seq_cst 8, addrspace 0)
...
OpenPOWER on IntegriCloud