summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/smrd.ll
diff options
context:
space:
mode:
authorMarek Olsak <marek.olsak@amd.com>2017-11-09 01:52:17 +0000
committerMarek Olsak <marek.olsak@amd.com>2017-11-09 01:52:17 +0000
commitffadcb744bc3d8ce0ca516bab09445643c1061a5 (patch)
tree793f4419ec97e06c11200049c96cf0139c6eb1ad /llvm/test/CodeGen/AMDGPU/smrd.ll
parent8a94f220aa8f728a87fcc8166838bf1c7315b73e (diff)
downloadbcm5719-llvm-ffadcb744bc3d8ce0ca516bab09445643c1061a5.tar.gz
bcm5719-llvm-ffadcb744bc3d8ce0ca516bab09445643c1061a5.zip
AMDGPU: Fold immediate offset into BUFFER_LOAD_DWORD lowered from SMEM
Summary: -5.3% code size in affected shaders. Changed stats only: 48486 shaders in 30489 tests Totals: SGPRS: 2086406 -> 2072430 (-0.67 %) VGPRS: 1626872 -> 1627960 (0.07 %) Spilled SGPRs: 7865 -> 7912 (0.60 %) Code Size: 60978060 -> 60188764 (-1.29 %) bytes Max Waves: 374530 -> 374342 (-0.05 %) Totals from affected shaders: SGPRS: 299664 -> 285688 (-4.66 %) VGPRS: 233844 -> 234932 (0.47 %) Spilled SGPRs: 3959 -> 4006 (1.19 %) Code Size: 14905272 -> 14115976 (-5.30 %) bytes Max Waves: 46202 -> 46014 (-0.41 %) Reviewers: arsenm, nhaehnle Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye Differential Revision: https://reviews.llvm.org/D38915 llvm-svn: 317750
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/smrd.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/smrd.ll21
1 files changed, 21 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/smrd.ll b/llvm/test/CodeGen/AMDGPU/smrd.ll
index cc2f6ce76b5..ab7e7422d57 100644
--- a/llvm/test/CodeGen/AMDGPU/smrd.ll
+++ b/llvm/test/CodeGen/AMDGPU/smrd.ll
@@ -191,6 +191,27 @@ main_body:
ret float %r
}
+; GCN-LABEL: {{^}}smrd_vgpr_offset_imm:
+; GCN-NEXT: BB#
+; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ;
+define amdgpu_ps float @smrd_vgpr_offset_imm(<4 x i32> inreg %desc, i32 %offset) #0 {
+main_body:
+ %off = add i32 %offset, 4095
+ %r = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %off)
+ ret float %r
+}
+
+; GCN-LABEL: {{^}}smrd_vgpr_offset_imm_too_large:
+; GCN-NEXT: BB#
+; GCN-NEXT: v_add_i32_e32 v0, vcc, 0x1000, v0
+; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen ;
+define amdgpu_ps float @smrd_vgpr_offset_imm_too_large(<4 x i32> inreg %desc, i32 %offset) #0 {
+main_body:
+ %off = add i32 %offset, 4096
+ %r = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %off)
+ ret float %r
+}
+
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
OpenPOWER on IntegriCloud