diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2014-07-21 15:45:01 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2014-07-21 15:45:01 +0000 |
commit | b02094e115fcc94c01b4d274ea24645fd3ea0c58 (patch) | |
tree | 83d43c7b53db71fb76e4727e5f04665851821a2e /llvm/test | |
parent | 42639a57de19504aaae3c1aad13699e8c4005432 (diff) | |
download | bcm5719-llvm-b02094e115fcc94c01b4d274ea24645fd3ea0c58.tar.gz bcm5719-llvm-b02094e115fcc94c01b4d274ea24645fd3ea0c58.zip |
R600/SI: Use scratch memory for large private arrays
llvm-svn: 213551
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/R600/array-ptr-calc-i32.ll | 9 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/gv-const-addrspace.ll | 19 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/indirect-private-64.ll | 40 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/private-memory.ll | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/work-item-intrinsics.ll | 10 |
5 files changed, 57 insertions, 37 deletions
diff --git a/llvm/test/CodeGen/R600/array-ptr-calc-i32.ll b/llvm/test/CodeGen/R600/array-ptr-calc-i32.ll index 2ddc61f3972..a2b69782351 100644 --- a/llvm/test/CodeGen/R600/array-ptr-calc-i32.ll +++ b/llvm/test/CodeGen/R600/array-ptr-calc-i32.ll @@ -11,15 +11,18 @@ declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate ; SI-LABEL: @test_private_array_ptr_calc: -; SI: V_ADD_I32_e32 [[PTRREG:v[0-9]+]] - -; SI-ALLOCA: V_MOVRELD_B32_e32 {{v[0-9]+}}, [[PTRREG]] +; FIXME: We end up with zero argument for ADD, because +; SIRegisterInfo::eliminateFrameIndex() blindly replaces the frame index +; with the appropriate offset. We should fold this into the store. +; SI-ALLOCA: V_ADD_I32_e32 [[PTRREG:v[0-9]+]], 0, v{{[0-9]+}} +; SI-ALLOCA: BUFFER_STORE_DWORD {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], [[PTRREG]] ; ; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this ; alloca to a vector. It currently fails because it does not know how ; to interpret: ; getelementptr [4 x i32]* %alloca, i32 1, i32 %b +; SI-PROMOTE: V_ADD_I32_e32 [[PTRREG:v[0-9]+]] ; SI-PROMOTE: DS_WRITE_B32 {{v[0-9]+}}, [[PTRREG]] define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) { %alloca = alloca [4 x i32], i32 4, align 16 diff --git a/llvm/test/CodeGen/R600/gv-const-addrspace.ll b/llvm/test/CodeGen/R600/gv-const-addrspace.ll index 074d9087ee6..e0ac317f998 100644 --- a/llvm/test/CodeGen/R600/gv-const-addrspace.ll +++ b/llvm/test/CodeGen/R600/gv-const-addrspace.ll @@ -76,3 +76,22 @@ define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) { store <1 x i32> %load, <1 x i32> addrspace(1)* %out, align 4 ret void } + +define void @gv_addressing_in_branch(float addrspace(1)* %out, i32 %index, i32 %a) { +entry: + %0 = icmp eq i32 0, %a + br i1 %0, label %if, label %else + +if: + %1 = getelementptr inbounds [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index + %2 = load float addrspace(2)* %1 + store float %2, float addrspace(1)* %out + br label %endif + +else: + store float 1.0, float addrspace(1)* %out + br label %endif + +endif: + ret void +} diff --git a/llvm/test/CodeGen/R600/indirect-private-64.ll b/llvm/test/CodeGen/R600/indirect-private-64.ll index 2f628458387..00331e6696d 100644 --- a/llvm/test/CodeGen/R600/indirect-private-64.ll +++ b/llvm/test/CodeGen/R600/indirect-private-64.ll @@ -6,10 +6,10 @@ declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind ; SI-LABEL: @private_access_f64_alloca: -; SI-ALLOCA: V_MOVRELD_B32_e32 -; SI-ALLOCA: V_MOVRELD_B32_e32 -; SI-ALLOCA: V_MOVRELS_B32_e32 -; SI-ALLOCA: V_MOVRELS_B32_e32 +; SI-ALLOCA: BUFFER_STORE_DWORDX2 +; FIXME: We should be able to use BUFFER_LOAD_DWORDX2 +; SI-ALLOCA: BUFFER_LOAD_DWORD +; SI-ALLOCA: BUFFER_LOAD_DWORD ; SI-PROMOTE: DS_WRITE_B64 ; SI-PROMOTE: DS_READ_B64 @@ -26,10 +26,12 @@ define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double ; SI-LABEL: @private_access_v2f64_alloca: -; SI-ALLOCA: V_MOVRELD_B32_e32 -; SI-ALLOCA: V_MOVRELD_B32_e32 -; SI-ALLOCA: V_MOVRELS_B32_e32 -; SI-ALLOCA: V_MOVRELS_B32_e32 +; SI-ALLOCA: BUFFER_STORE_DWORDX4 +; FIXME: We should be able to use BUFFER_LOAD_DWORDX4 +; SI-ALLOCA: BUFFER_LOAD_DWORD +; SI-ALLOCA: BUFFER_LOAD_DWORD +; SI-ALLOCA: BUFFER_LOAD_DWORD +; SI-ALLOCA: BUFFER_LOAD_DWORD ; SI-PROMOTE: DS_WRITE_B32 ; SI-PROMOTE: DS_WRITE_B32 @@ -52,10 +54,10 @@ define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out ; SI-LABEL: @private_access_i64_alloca: -; SI-ALLOCA: V_MOVRELD_B32_e32 -; SI-ALLOCA: V_MOVRELD_B32_e32 -; SI-ALLOCA: V_MOVRELS_B32_e32 -; SI-ALLOCA: V_MOVRELS_B32_e32 +; SI-ALLOCA: BUFFER_STORE_DWORDX2 +; FIXME: We should be able to use BUFFER_LOAD_DWORDX2 +; SI-ALLOCA: BUFFER_LOAD_DWORD +; SI-ALLOCA: BUFFER_LOAD_DWORD ; SI-PROMOTE: DS_WRITE_B64 ; SI-PROMOTE: DS_READ_B64 @@ -72,14 +74,12 @@ define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrs ; SI-LABEL: @private_access_v2i64_alloca: -; SI-ALLOCA: V_MOVRELD_B32_e32 -; SI-ALLOCA: V_MOVRELD_B32_e32 -; SI-ALLOCA: V_MOVRELD_B32_e32 -; SI-ALLOCA: V_MOVRELD_B32_e32 -; SI-ALLOCA: V_MOVRELS_B32_e32 -; SI-ALLOCA: V_MOVRELS_B32_e32 -; SI-ALLOCA: V_MOVRELS_B32_e32 -; SI-ALLOCA: V_MOVRELS_B32_e32 +; SI-ALLOCA: BUFFER_STORE_DWORDX4 +; FIXME: We should be able to use BUFFER_LOAD_DWORDX4 +; SI-ALLOCA: BUFFER_LOAD_DWORD +; SI-ALLOCA: BUFFER_LOAD_DWORD +; SI-ALLOCA: BUFFER_LOAD_DWORD +; SI-ALLOCA: BUFFER_LOAD_DWORD ; SI-PROMOTE: DS_WRITE_B32 ; SI-PROMOTE: DS_WRITE_B32 diff --git a/llvm/test/CodeGen/R600/private-memory.ll b/llvm/test/CodeGen/R600/private-memory.ll index 1f34b115c70..3ce8c2cb03d 100644 --- a/llvm/test/CodeGen/R600/private-memory.ll +++ b/llvm/test/CodeGen/R600/private-memory.ll @@ -16,12 +16,8 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone ; SI-PROMOTE: DS_READ_B32 ; SI-PROMOTE: DS_READ_B32 -; SI-ALLOCA: V_READFIRSTLANE_B32 vcc_lo -; SI-ALLOCA: V_MOVRELD -; SI-ALLOCA: S_CBRANCH -; SI-ALLOCA: V_READFIRSTLANE_B32 vcc_lo -; SI-ALLOCA: V_MOVRELD -; SI-ALLOCA: S_CBRANCH +; SI-ALLOCA: BUFFER_STORE_DWORD v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}} +; SI-ALLOCA: BUFFER_STORE_DWORD v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}} define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) { entry: %stack = alloca [5 x i32], align 4 @@ -120,7 +116,9 @@ for.end: ; R600: MOVA_INT -; SI-PROMOTE: V_MOVRELS_B32_e32 +; SI-PROMOTE: BUFFER_STORE_SHORT v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}} +; SI-PROMOTE: BUFFER_STORE_SHORT v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}} +; SI_PROMOTE: BUFFER_LOAD_SSHORT v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] + v{{[0-9]+}}, s{{[0-9]+}} define void @short_array(i32 addrspace(1)* %out, i32 %index) { entry: %0 = alloca [2 x i16] @@ -139,8 +137,8 @@ entry: ; R600: MOVA_INT -; SI: V_OR_B32_e32 v{{[0-9]}}, 0x100 -; SI: V_MOVRELS_B32_e32 +; SI-DAG: BUFFER_STORE_BYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}, 0x0 +; SI-DAG: BUFFER_STORE_BYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}, 0x1 define void @char_array(i32 addrspace(1)* %out, i32 %index) { entry: %0 = alloca [2 x i8] diff --git a/llvm/test/CodeGen/R600/work-item-intrinsics.ll b/llvm/test/CodeGen/R600/work-item-intrinsics.ll index 90079b005bb..01236590742 100644 --- a/llvm/test/CodeGen/R600/work-item-intrinsics.ll +++ b/llvm/test/CodeGen/R600/work-item-intrinsics.ll @@ -127,12 +127,12 @@ entry: ret void } -; The tgid values are stored in ss offset by the number of user ss. -; Currently we always use exactly 2 user ss for the pointer to the +; The tgid values are stored in sgprs offset by the number of user sgprs. +; Currently we always use exactly 2 user sgprs for the pointer to the ; kernel arguments, but this may change in the future. ; SI-CHECK: @tgid_x -; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s2 +; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s4 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]] define void @tgid_x (i32 addrspace(1)* %out) { entry: @@ -142,7 +142,7 @@ entry: } ; SI-CHECK: @tgid_y -; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s3 +; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s5 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]] define void @tgid_y (i32 addrspace(1)* %out) { entry: @@ -152,7 +152,7 @@ entry: } ; SI-CHECK: @tgid_z -; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s4 +; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s6 ; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]] define void @tgid_z (i32 addrspace(1)* %out) { entry: |