summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2016-12-09 15:57:15 +0000
committerTom Stellard <thomas.stellard@amd.com>2016-12-09 15:57:15 +0000
commit2a48433fcf29c943e2fa90aca355fdf477cbe8b6 (patch)
treef0c3dfbb6b471be9207f984fb66fb216edebd18a /llvm/test/CodeGen
parentbf9c0e7434f7822f823ea60ad0cb05b34bd56e2a (diff)
downloadbcm5719-llvm-2a48433fcf29c943e2fa90aca355fdf477cbe8b6.tar.gz
bcm5719-llvm-2a48433fcf29c943e2fa90aca355fdf477cbe8b6.zip
AMDGPU/SI: Don't mark VINTRP instructions as mayLoad
Summary: These instructions technically do read from memory, but the memory is considered to be out of bounds for normal load/store instructions. shader-db stats: SGPRS: 1416075 -> 1413323 (-0.19 %) VGPRS: 867413 -> 863935 (-0.40 %) Spilled SGPRs: 1409 -> 1354 (-3.90 %) Spilled VGPRs: 63 -> 63 (0.00 %) Private memory VGPRs: 880 -> 880 (0.00 %) Scratch size: 2648 -> 2632 (-0.60 %) dwords per thread Code Size: 37889052 -> 37897340 (0.02 %) bytes LDS: 2147 -> 2147 (0.00 %) blocks Max Waves: 279243 -> 280369 (0.40 %) Wait states: 0 -> 0 (0.00 %) Reviewers: nhaehnle, mareko, arsenm Subscribers: kzhuravl, wdng, yaxunl, tony-tye Differential Revision: https://reviews.llvm.org/D27593 llvm-svn: 289219
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.SI.fs.interp.ll8
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll16
2 files changed, 19 insertions, 5 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.SI.fs.interp.ll b/llvm/test/CodeGen/AMDGPU/llvm.SI.fs.interp.ll
index ca1faebb77e..9e7c3c2e620 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.SI.fs.interp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.SI.fs.interp.ll
@@ -5,10 +5,10 @@
;GCN-LABEL: {{^}}main:
;GCN-NOT: s_wqm
-;GCN: s_mov_b32
-;GCN-NEXT: v_interp_mov_f32
-;GCN: v_interp_p1_f32
-;GCN: v_interp_p2_f32
+;GCN: s_mov_b32 m0
+;GCN-DAG: v_interp_mov_f32
+;GCN-DAG: v_interp_p1_f32
+;GCN-DAG: v_interp_p2_f32
define amdgpu_ps void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>) {
main_body:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
index 9613a5082d3..adf1593c3fa 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
@@ -1,5 +1,5 @@
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s
+;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=GCN,VI %s
;GCN-LABEL: {{^}}v_interp:
;GCN-NOT: s_wqm
@@ -20,6 +20,20 @@ main_body:
ret void
}
+; SI won't merge ds memory operations, because of the signed offset bug, so
+; we only have check lines for VI.
+; VI-LABEL: v_interp_readnone:
+; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
+; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
+define amdgpu_ps void @v_interp_readnone(float addrspace(3)* %lds) {
+ store float 0.0, float addrspace(3)* %lds
+ %tmp1 = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 0)
+ %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4
+ store float 0.0, float addrspace(3)* %tmp2
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp1, float %tmp1, float %tmp1, float %tmp1)
+ ret void
+}
+
; Function Attrs: nounwind readnone
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
OpenPOWER on IntegriCloud