diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2015-08-07 20:18:34 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2015-08-07 20:18:34 +0000 |
commit | 711b390a7c7854679388053e543273c56c25196e (patch) | |
tree | 2d4f9a1816b086af1059895e0c9da01716aa074a /llvm/test | |
parent | ee0b2b5c2b9a7e8af3e278b5f9f4aaf10827754f (diff) | |
download | bcm5719-llvm-711b390a7c7854679388053e543273c56c25196e.tar.gz bcm5719-llvm-711b390a7c7854679388053e543273c56c25196e.zip |
AMDGPU: Assume SMRD access for constant address space
Since r243294 these are selected to SMRD and
moved later if required.
llvm-svn: 244354
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll | 32 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll | 256 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/salu-to-valu.ll | 32 |
3 files changed, 286 insertions, 34 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll new file mode 100644 index 00000000000..4d70ba83781 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll @@ -0,0 +1,32 @@ +; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s +; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s + +; OPT-LABEL: @test_no_sink_flat_small_offset_i32( +; OPT: getelementptr i32, i32 addrspace(4)* %in +; OPT: br i1 +; OPT-NOT: ptrtoint + +; GCN-LABEL: {{^}}test_no_sink_flat_small_offset_i32: +; GCN: flat_load_dword +; GCN: {{^}}BB0_2: +define void @test_no_sink_flat_small_offset_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) { +entry: + %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999 + %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7 + %tmp0 = icmp eq i32 %cond, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = load i32, i32 addrspace(4)* %in.gep + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(4)* %out.gep + br label %done + +done: + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll index a68d110fdc9..5cb6cc3ff67 100644 --- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll @@ -1,5 +1,7 @@ +; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI %s ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s +; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s @@ -115,35 +117,6 @@ done: ret void } -; OPT-LABEL: @test_no_sink_flat_small_offset_i32( -; OPT: getelementptr i32, i32 addrspace(4)* %in -; OPT: br i1 -; OPT-NOT: ptrtoint - -; GCN-LABEL: {{^}}test_no_sink_flat_small_offset_i32: -; GCN: flat_load_dword -; GCN: {{^}}BB4_2: - -define void @test_no_sink_flat_small_offset_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) { -entry: - %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999 - %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7 - %tmp0 = icmp eq i32 %cond, 0 - br i1 %tmp0, label %endif, label %if - -if: - %tmp1 = load i32, i32 addrspace(4)* %in.gep - br label %endif - -endif: - %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] - store i32 %x, i32 addrspace(4)* %out.gep - br label %done - -done: - ret void -} - ; OPT-LABEL: @test_sink_scratch_small_offset_i32( ; OPT-NOT: getelementptr [512 x i32] ; OPT: br i1 @@ -153,7 +126,7 @@ done: ; GCN: s_and_saveexec_b64 ; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}} ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}} -; GCN: {{^}}BB5_2: +; GCN: {{^}}BB4_2: define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) { entry: %alloca = alloca [512 x i32], align 4 @@ -189,7 +162,7 @@ done: ; GCN: s_and_saveexec_b64 ; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}} ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}} -; GCN: {{^}}BB6_2: +; GCN: {{^}}BB5_2: define void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) { entry: %alloca = alloca [512 x i32], align 4 @@ -222,7 +195,7 @@ done: ; GCN: s_and_saveexec_b64 ; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] -; GCN: {{^}}BB7_2: +; GCN: {{^}}BB6_2: define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset, i32 %cond) { entry: %offset.ext = zext i32 %offset to i64 @@ -246,3 +219,222 @@ done: attributes #0 = { nounwind readnone } attributes #1 = { nounwind } + + + +; OPT-LABEL: @test_sink_constant_small_offset_i32 +; OPT-NOT: getelementptr i32, i32 addrspace(2)* +; OPT: br i1 + +; GCN-LABEL: {{^}}test_sink_constant_small_offset_i32: +; GCN: s_and_saveexec_b64 +; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x7{{$}} +; GCN: s_or_b64 exec, exec +define void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { +entry: + %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 + %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 7 + %tmp0 = icmp eq i32 %cond, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = load i32, i32 addrspace(2)* %in.gep + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(1)* %out.gep + br label %done + +done: + ret void +} + +; OPT-LABEL: @test_sink_constant_max_8_bit_offset_i32 +; OPT-NOT: getelementptr i32, i32 addrspace(2)* +; OPT: br i1 + +; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_i32: +; GCN: s_and_saveexec_b64 +; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xff{{$}} +; GCN: s_or_b64 exec, exec +define void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { +entry: + %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 + %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 255 + %tmp0 = icmp eq i32 %cond, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = load i32, i32 addrspace(2)* %in.gep + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(1)* %out.gep + br label %done + +done: + ret void +} + +; OPT-LABEL: @test_sink_constant_max_8_bit_offset_p1_i32 +; OPT-SI: getelementptr i32, i32 addrspace(2)* +; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)* +; OPT-VI-NOT: getelementptr i32, i32 addrspace(2)* +; OPT: br i1 + +; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_p1_i32: +; GCN: s_and_saveexec_b64 +; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x400 + +; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} +; GCN: s_or_b64 exec, exec +define void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { +entry: + %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 + %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 256 + %tmp0 = icmp eq i32 %cond, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = load i32, i32 addrspace(2)* %in.gep + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(1)* %out.gep + br label %done + +done: + ret void +} + +; OPT-LABEL: @test_sink_constant_max_32_bit_offset_i32 +; OPT-SI: getelementptr i32, i32 addrspace(2)* +; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)* +; OPT: br i1 + +; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32: +; GCN: s_and_saveexec_b64 +; GCN-DAG: s_mov_b32 s{{[0-9]+}}, 3{{$}} +; GCN-DAG: s_mov_b32 s{{[0-9]+}}, -4{{$}} +; GCN: s_add_u32 +; GCN: s_addc_u32 +; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}} +; GCN: s_or_b64 exec, exec +define void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { +entry: + %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 + %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 4294967295 + %tmp0 = icmp eq i32 %cond, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = load i32, i32 addrspace(2)* %in.gep + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(1)* %out.gep + br label %done + +done: + ret void +} + +; OPT-LABEL: @test_sink_constant_max_32_bit_offset_p1_i32 +; OPT: getelementptr i32, i32 addrspace(2)* +; OPT: br i1 + +; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_p1_i32: +; GCN: s_and_saveexec_b64 +; GCN: s_add_u32 +; GCN: s_addc_u32 +; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}} +; GCN: s_or_b64 exec, exec +define void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { +entry: + %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 + %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 17179869181 + %tmp0 = icmp eq i32 %cond, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = load i32, i32 addrspace(2)* %in.gep + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(1)* %out.gep + br label %done + +done: + ret void +} + +; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_i32: +; GCN: s_and_saveexec_b64 +; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc{{$}} +; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} + +; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x3ffff{{$}} +; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffc{{$}} + +; GCN: s_or_b64 exec, exec +define void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { +entry: + %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 + %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262143 + %tmp0 = icmp eq i32 %cond, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = load i32, i32 addrspace(2)* %in.gep + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(1)* %out.gep + br label %done + +done: + ret void +} + +; OPT-LABEL: @test_sink_constant_max_20_bit_byte_offset_p1_i32 +; OPT-SI: getelementptr i32, i32 addrspace(2)* +; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)* +; OPT-VI: getelementptr i32, i32 addrspace(2)* +; OPT: br i1 + +; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_p1_i32: +; GCN: s_and_saveexec_b64 +; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}} +; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} + +; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x40000{{$}} + +; VI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}} +; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} + +; GCN: s_or_b64 exec, exec +define void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) { +entry: + %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 + %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262144 + %tmp0 = icmp eq i32 %cond, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = load i32, i32 addrspace(2)* %in.gep + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(1)* %out.gep + br label %done + +done: + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll index 0b964957654..be4dc9a33b0 100644 --- a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll +++ b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -75,10 +75,10 @@ endif: ret void } -; Test moving ann SMRD with an immediate offset to the VALU +; Test moving an SMRD with an immediate offset to the VALU ; CHECK-LABEL: {{^}}smrd_valu2: -; CHECK: buffer_load_dword +; CHECK: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} define void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) { entry: %0 = call i32 @llvm.r600.read.tidig.x() nounwind readnone @@ -89,6 +89,34 @@ entry: ret void } +; CHECK-LABEL: {{^}}smrd_valu2_max_smrd_offset: +; CHECK: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1020{{$}} +define void @smrd_valu2_max_smrd_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) { +entry: + %0 = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %1 = add i32 %0, 4 + %2 = getelementptr [1024 x i32], [1024 x i32] addrspace(2)* %in, i32 %0, i32 255 + %3 = load i32, i32 addrspace(2)* %2 + store i32 %3, i32 addrspace(1)* %out + ret void +} + +; Offset is too big to fit in SMRD 8-bit offset, but small enough to +; fit in MUBUF offset. +; FIXME: We should be using the offset but we don't + +; CHECK-LABEL: {{^}}smrd_valu2_mubuf_offset: +; CHECK: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +define void @smrd_valu2_mubuf_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) { +entry: + %0 = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %1 = add i32 %0, 4 + %2 = getelementptr [1024 x i32], [1024 x i32] addrspace(2)* %in, i32 %0, i32 256 + %3 = load i32, i32 addrspace(2)* %2 + store i32 %3, i32 addrspace(1)* %out + ret void +} + ; CHECK-LABEL: {{^}}s_load_imm_v8i32: ; CHECK: buffer_load_dwordx4 ; CHECK: buffer_load_dwordx4 |