diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-12-27 10:17:45 -0500 |
|---|---|---|
| committer | Matt Arsenault <arsenm2@gmail.com> | 2020-01-04 12:40:35 -0500 |
| commit | 4e972224c476e05af445130e2b208e9819d220a5 (patch) | |
| tree | b4287ca6efecbd76db1a5331709bbe0646c4ad42 /llvm/test/CodeGen/AMDGPU/GlobalISel | |
| parent | d9b5063b25a7d751b4e3dcbb22565fd0d9c285ec (diff) | |
| download | bcm5719-llvm-4e972224c476e05af445130e2b208e9819d220a5.tar.gz bcm5719-llvm-4e972224c476e05af445130e2b208e9819d220a5.zip | |
AMDGPU/GlobalISel: Refine SMRD selection rules
Fix selecting these for volatile global loads, and ensure the loads
are constant enough.
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/GlobalISel')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir | 166 |
2 files changed, 150 insertions, 32 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir index 8eaa98d9957..b56a67d5336 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir @@ -243,9 +243,9 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v8i32_uniform - ; CHECK: (<8 x s32>) = G_LOAD %0(p1) :: (load 32, addrspace 1) + ; CHECK: (<8 x s32>) = G_LOAD %0(p1) :: (invariant load 32, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(<8 x s32>) = G_LOAD %0 :: (load 32, addrspace 1) + %1:_(<8 x s32>) = G_LOAD %0 :: (invariant load 32, addrspace 1) ... --- @@ -256,9 +256,9 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v4i64_uniform - ; CHECK: (<4 x s64>) = G_LOAD %0(p1) :: (load 32, addrspace 1) + ; CHECK: (<4 x s64>) = G_LOAD %0(p1) :: (invariant load 32, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(<4 x s64>) = G_LOAD %0 :: (load 32, addrspace 1) + %1:_(<4 x s64>) = G_LOAD %0 :: (invariant load 32, addrspace 1) ... --- @@ -269,9 +269,9 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v16i32_uniform - ; CHECK: (<16 x s32>) = G_LOAD %0(p1) :: (load 64, addrspace 1) + ; CHECK: (<16 x s32>) = G_LOAD %0(p1) :: (invariant load 64, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(<16 x s32>) = G_LOAD %0 :: (load 64, addrspace 1) + %1:_(<16 x s32>) = G_LOAD %0 :: (invariant load 64, addrspace 1) ... --- @@ -282,9 +282,9 @@ body: | bb.0: liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v8i64_uniform - ; CHECK: (<8 x s64>) = G_LOAD %0(p1) :: (load 64, addrspace 1) + ; CHECK: (<8 x s64>) = G_LOAD %0(p1) :: (invariant load 64, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 - %1:_(<8 x s64>) = G_LOAD %0 :: (load 64, addrspace 1) + %1:_(<8 x s64>) = G_LOAD %0 :: (invariant load 64, addrspace 1) ... --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir index b7499599ee7..120bbfaba24 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir @@ -1,68 +1,171 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=regbankselect %s -verify-machineinstrs -o - | FileCheck %s --- | - define amdgpu_kernel void @load_constant(i32 addrspace(4)* %ptr0) { ret void } - define amdgpu_kernel void @load_global_uniform(i32 addrspace(1)* %ptr1) { + define amdgpu_kernel void @load_constant(i32 addrspace(4)* %ptr0) { + ret void + } + + define amdgpu_kernel void @load_constant_volatile(i32 addrspace(4)* %ptr0) { + ret void + } + + define amdgpu_kernel void @load_global_uniform_invariant(i32 addrspace(1)* %ptr1) { + %tmp0 = load i32, i32 addrspace(1)* %ptr1 + ret void + } + + define amdgpu_kernel void @load_global_uniform_noclobber(i32 addrspace(1)* %ptr1) { + %tmp0 = load i32, i32 addrspace(1)* %ptr1, !amdgpu.noclobber !0 + ret void + } + + define amdgpu_kernel void @load_global_uniform_variant(i32 addrspace(1)* %ptr1) { + %tmp0 = load i32, i32 addrspace(1)* %ptr1 + ret void + } + + define amdgpu_kernel void @load_global_uniform_volatile_invariant(i32 addrspace(1)* %ptr1) { %tmp0 = load i32, i32 addrspace(1)* %ptr1 ret void } + + define amdgpu_kernel void @load_global_uniform_atomic_invariant(i32 addrspace(1)* %ptr1) { + %tmp0 = load i32, i32 addrspace(1)* %ptr1 + ret void + } + define amdgpu_kernel void @load_global_non_uniform(i32 addrspace(1)* %ptr2) { %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0 %tmp1 = getelementptr i32, i32 addrspace(1)* %ptr2, i32 %tmp0 %tmp2 = load i32, i32 addrspace(1)* %tmp1 ret void } + define void @non_power_of_2() { ret void } + + define amdgpu_kernel void @load_constant_v4i16_from_6_align8(<3 x i16> addrspace(4)* %ptr0) { + ret void + } + declare i32 @llvm.amdgcn.workitem.id.x() #0 attributes #0 = { nounwind readnone } -... + !0 = !{} +... --- -name : load_constant +name: load_constant legalized: true -# CHECK-LABEL: name: load_constant -# CHECK: registers: -# CHECK: - { id: 0, class: sgpr, preferred-register: '' } -# CHECK: - { id: 1, class: sgpr, preferred-register: '' } - body: | bb.0: liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: load_constant + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p4) :: (load 4 from %ir.ptr0, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load 4 from %ir.ptr0) ... --- -name: load_global_uniform +name: load_constant_volatile legalized: true -# CHECK-LABEL: name: load_global_uniform -# CHECK: registers: -# CHECK: - { id: 0, class: sgpr, preferred-register: '' } -# CHECK: - { id: 1, class: sgpr, preferred-register: '' } +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: load_constant_volatile + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p4) :: (volatile load 4 from %ir.ptr0, addrspace 4) + %0:_(p4) = COPY $sgpr0_sgpr1 + %1:_(s32) = G_LOAD %0 :: (volatile load 4 from %ir.ptr0) +... + +--- +name: load_global_uniform_invariant +legalized: true body: | bb.0: liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: load_global_uniform_invariant + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p1) :: (invariant load 4 from %ir.ptr1, addrspace 1) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(s32) = G_LOAD %0 :: (invariant load 4 from %ir.ptr1) +... + +--- +name: load_global_uniform_noclobber +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: load_global_uniform_noclobber + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load 4 from %ir.ptr1, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load 4 from %ir.ptr1) ... --- -name: load_global_non_uniform +name: load_global_uniform_variant +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: load_global_uniform_variant + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load 4 from %ir.ptr1, addrspace 1) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(s32) = G_LOAD %0 :: (load 4 from %ir.ptr1) +... + +--- +name: load_global_uniform_volatile_invariant +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: load_global_uniform_volatile_invariant + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (volatile invariant load 4 from %ir.ptr1, addrspace 1) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(s32) = G_LOAD %0 :: (volatile invariant load 4 from %ir.ptr1) +... + +--- +name: load_global_uniform_atomic_invariant legalized: true -# CHECK-LABEL: name: load_global_non_uniform -# CHECK: registers: -# CHECK: - { id: 0, class: sgpr, preferred-register: '' } -# CHECK: - { id: 1, class: vgpr, preferred-register: '' } -# CHECK: - { id: 2, class: vgpr, preferred-register: '' } +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: load_global_uniform_atomic_invariant + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (invariant load acquire 4 from %ir.ptr1, addrspace 1) + %0:_(p1) = COPY $sgpr0_sgpr1 + %1:_(s32) = G_LOAD %0 :: (invariant load acquire 4 from %ir.ptr1) +... +--- +name: load_global_non_uniform +legalized: true body: | bb.0: liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: load_global_non_uniform + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p1) :: (load 4 from %ir.tmp1, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load 4 from %ir.tmp1) ... @@ -71,14 +174,29 @@ body: | name: non_power_of_2 legalized: true -# CHECK-LABEL: name: non_power_of_2 -# CHECK: [[S448:%[0-9]+]]:sgpr(s448) = G_IMPLICIT_DEF -# CHECK: sgpr(s32) = G_EXTRACT [[S448]](s448), 0 - body: | bb.0: + ; CHECK-LABEL: name: non_power_of_2 + ; CHECK: [[DEF:%[0-9]+]]:sgpr(s448) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:sgpr(s32) = G_EXTRACT [[DEF]](s448), 0 + ; CHECK: $sgpr0 = COPY [[EXTRACT]](s32) + ; CHECK: SI_RETURN_TO_EPILOG $sgpr0 %0:_(s448) = G_IMPLICIT_DEF %1:_(s32) = G_EXTRACT %0:_(s448), 0 $sgpr0 = COPY %1:_(s32) SI_RETURN_TO_EPILOG $sgpr0 ... + +--- +name: load_constant_v4i16_from_6_align8 +legalized: true + +body: | + bb.0: + ; CHECK-LABEL: name: load_constant_v4i16_from_6_align8 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 6 from %ir.ptr0, align 8, addrspace 4) + %0:_(p4) = COPY $sgpr0_sgpr1 + %1:_(<4 x s16>) = G_LOAD %0 :: (load 6 from %ir.ptr0, align 8, addrspace 4) + +... |

