summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2014-07-21 15:45:01 +0000
committerTom Stellard <thomas.stellard@amd.com>2014-07-21 15:45:01 +0000
commitb02094e115fcc94c01b4d274ea24645fd3ea0c58 (patch)
tree83d43c7b53db71fb76e4727e5f04665851821a2e /llvm/test
parent42639a57de19504aaae3c1aad13699e8c4005432 (diff)
downloadbcm5719-llvm-b02094e115fcc94c01b4d274ea24645fd3ea0c58.tar.gz
bcm5719-llvm-b02094e115fcc94c01b4d274ea24645fd3ea0c58.zip
R600/SI: Use scratch memory for large private arrays
llvm-svn: 213551
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/R600/array-ptr-calc-i32.ll9
-rw-r--r--llvm/test/CodeGen/R600/gv-const-addrspace.ll19
-rw-r--r--llvm/test/CodeGen/R600/indirect-private-64.ll40
-rw-r--r--llvm/test/CodeGen/R600/private-memory.ll16
-rw-r--r--llvm/test/CodeGen/R600/work-item-intrinsics.ll10
5 files changed, 57 insertions, 37 deletions
diff --git a/llvm/test/CodeGen/R600/array-ptr-calc-i32.ll b/llvm/test/CodeGen/R600/array-ptr-calc-i32.ll
index 2ddc61f3972..a2b69782351 100644
--- a/llvm/test/CodeGen/R600/array-ptr-calc-i32.ll
+++ b/llvm/test/CodeGen/R600/array-ptr-calc-i32.ll
@@ -11,15 +11,18 @@ declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
; SI-LABEL: @test_private_array_ptr_calc:
-; SI: V_ADD_I32_e32 [[PTRREG:v[0-9]+]]
-
-; SI-ALLOCA: V_MOVRELD_B32_e32 {{v[0-9]+}}, [[PTRREG]]
+; FIXME: We end up with zero argument for ADD, because
+; SIRegisterInfo::eliminateFrameIndex() blindly replaces the frame index
+; with the appropriate offset. We should fold this into the store.
+; SI-ALLOCA: V_ADD_I32_e32 [[PTRREG:v[0-9]+]], 0, v{{[0-9]+}}
+; SI-ALLOCA: BUFFER_STORE_DWORD {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], [[PTRREG]]
;
; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this
; alloca to a vector. It currently fails because it does not know how
; to interpret:
; getelementptr [4 x i32]* %alloca, i32 1, i32 %b
+; SI-PROMOTE: V_ADD_I32_e32 [[PTRREG:v[0-9]+]]
; SI-PROMOTE: DS_WRITE_B32 {{v[0-9]+}}, [[PTRREG]]
define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
%alloca = alloca [4 x i32], i32 4, align 16
diff --git a/llvm/test/CodeGen/R600/gv-const-addrspace.ll b/llvm/test/CodeGen/R600/gv-const-addrspace.ll
index 074d9087ee6..e0ac317f998 100644
--- a/llvm/test/CodeGen/R600/gv-const-addrspace.ll
+++ b/llvm/test/CodeGen/R600/gv-const-addrspace.ll
@@ -76,3 +76,22 @@ define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) {
store <1 x i32> %load, <1 x i32> addrspace(1)* %out, align 4
ret void
}
+
+define void @gv_addressing_in_branch(float addrspace(1)* %out, i32 %index, i32 %a) {
+entry:
+ %0 = icmp eq i32 0, %a
+ br i1 %0, label %if, label %else
+
+if:
+ %1 = getelementptr inbounds [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
+ %2 = load float addrspace(2)* %1
+ store float %2, float addrspace(1)* %out
+ br label %endif
+
+else:
+ store float 1.0, float addrspace(1)* %out
+ br label %endif
+
+endif:
+ ret void
+}
diff --git a/llvm/test/CodeGen/R600/indirect-private-64.ll b/llvm/test/CodeGen/R600/indirect-private-64.ll
index 2f628458387..00331e6696d 100644
--- a/llvm/test/CodeGen/R600/indirect-private-64.ll
+++ b/llvm/test/CodeGen/R600/indirect-private-64.ll
@@ -6,10 +6,10 @@ declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind
; SI-LABEL: @private_access_f64_alloca:
-; SI-ALLOCA: V_MOVRELD_B32_e32
-; SI-ALLOCA: V_MOVRELD_B32_e32
-; SI-ALLOCA: V_MOVRELS_B32_e32
-; SI-ALLOCA: V_MOVRELS_B32_e32
+; SI-ALLOCA: BUFFER_STORE_DWORDX2
+; FIXME: We should be able to use BUFFER_LOAD_DWORDX2
+; SI-ALLOCA: BUFFER_LOAD_DWORD
+; SI-ALLOCA: BUFFER_LOAD_DWORD
; SI-PROMOTE: DS_WRITE_B64
; SI-PROMOTE: DS_READ_B64
@@ -26,10 +26,12 @@ define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double
; SI-LABEL: @private_access_v2f64_alloca:
-; SI-ALLOCA: V_MOVRELD_B32_e32
-; SI-ALLOCA: V_MOVRELD_B32_e32
-; SI-ALLOCA: V_MOVRELS_B32_e32
-; SI-ALLOCA: V_MOVRELS_B32_e32
+; SI-ALLOCA: BUFFER_STORE_DWORDX4
+; FIXME: We should be able to use BUFFER_LOAD_DWORDX4
+; SI-ALLOCA: BUFFER_LOAD_DWORD
+; SI-ALLOCA: BUFFER_LOAD_DWORD
+; SI-ALLOCA: BUFFER_LOAD_DWORD
+; SI-ALLOCA: BUFFER_LOAD_DWORD
; SI-PROMOTE: DS_WRITE_B32
; SI-PROMOTE: DS_WRITE_B32
@@ -52,10 +54,10 @@ define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out
; SI-LABEL: @private_access_i64_alloca:
-; SI-ALLOCA: V_MOVRELD_B32_e32
-; SI-ALLOCA: V_MOVRELD_B32_e32
-; SI-ALLOCA: V_MOVRELS_B32_e32
-; SI-ALLOCA: V_MOVRELS_B32_e32
+; SI-ALLOCA: BUFFER_STORE_DWORDX2
+; FIXME: We should be able to use BUFFER_LOAD_DWORDX2
+; SI-ALLOCA: BUFFER_LOAD_DWORD
+; SI-ALLOCA: BUFFER_LOAD_DWORD
; SI-PROMOTE: DS_WRITE_B64
; SI-PROMOTE: DS_READ_B64
@@ -72,14 +74,12 @@ define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrs
; SI-LABEL: @private_access_v2i64_alloca:
-; SI-ALLOCA: V_MOVRELD_B32_e32
-; SI-ALLOCA: V_MOVRELD_B32_e32
-; SI-ALLOCA: V_MOVRELD_B32_e32
-; SI-ALLOCA: V_MOVRELD_B32_e32
-; SI-ALLOCA: V_MOVRELS_B32_e32
-; SI-ALLOCA: V_MOVRELS_B32_e32
-; SI-ALLOCA: V_MOVRELS_B32_e32
-; SI-ALLOCA: V_MOVRELS_B32_e32
+; SI-ALLOCA: BUFFER_STORE_DWORDX4
+; FIXME: We should be able to use BUFFER_LOAD_DWORDX4
+; SI-ALLOCA: BUFFER_LOAD_DWORD
+; SI-ALLOCA: BUFFER_LOAD_DWORD
+; SI-ALLOCA: BUFFER_LOAD_DWORD
+; SI-ALLOCA: BUFFER_LOAD_DWORD
; SI-PROMOTE: DS_WRITE_B32
; SI-PROMOTE: DS_WRITE_B32
diff --git a/llvm/test/CodeGen/R600/private-memory.ll b/llvm/test/CodeGen/R600/private-memory.ll
index 1f34b115c70..3ce8c2cb03d 100644
--- a/llvm/test/CodeGen/R600/private-memory.ll
+++ b/llvm/test/CodeGen/R600/private-memory.ll
@@ -16,12 +16,8 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; SI-PROMOTE: DS_READ_B32
; SI-PROMOTE: DS_READ_B32
-; SI-ALLOCA: V_READFIRSTLANE_B32 vcc_lo
-; SI-ALLOCA: V_MOVRELD
-; SI-ALLOCA: S_CBRANCH
-; SI-ALLOCA: V_READFIRSTLANE_B32 vcc_lo
-; SI-ALLOCA: V_MOVRELD
-; SI-ALLOCA: S_CBRANCH
+; SI-ALLOCA: BUFFER_STORE_DWORD v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}
+; SI-ALLOCA: BUFFER_STORE_DWORD v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}
define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
entry:
%stack = alloca [5 x i32], align 4
@@ -120,7 +116,9 @@ for.end:
; R600: MOVA_INT
-; SI-PROMOTE: V_MOVRELS_B32_e32
+; SI-PROMOTE: BUFFER_STORE_SHORT v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}
+; SI-PROMOTE: BUFFER_STORE_SHORT v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}
+; SI_PROMOTE: BUFFER_LOAD_SSHORT v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] + v{{[0-9]+}}, s{{[0-9]+}}
define void @short_array(i32 addrspace(1)* %out, i32 %index) {
entry:
%0 = alloca [2 x i16]
@@ -139,8 +137,8 @@ entry:
; R600: MOVA_INT
-; SI: V_OR_B32_e32 v{{[0-9]}}, 0x100
-; SI: V_MOVRELS_B32_e32
+; SI-DAG: BUFFER_STORE_BYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}, 0x0
+; SI-DAG: BUFFER_STORE_BYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}, 0x1
define void @char_array(i32 addrspace(1)* %out, i32 %index) {
entry:
%0 = alloca [2 x i8]
diff --git a/llvm/test/CodeGen/R600/work-item-intrinsics.ll b/llvm/test/CodeGen/R600/work-item-intrinsics.ll
index 90079b005bb..01236590742 100644
--- a/llvm/test/CodeGen/R600/work-item-intrinsics.ll
+++ b/llvm/test/CodeGen/R600/work-item-intrinsics.ll
@@ -127,12 +127,12 @@ entry:
ret void
}
-; The tgid values are stored in ss offset by the number of user ss.
-; Currently we always use exactly 2 user ss for the pointer to the
+; The tgid values are stored in sgprs offset by the number of user sgprs.
+; Currently we always use exactly 2 user sgprs for the pointer to the
; kernel arguments, but this may change in the future.
; SI-CHECK: @tgid_x
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s2
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s4
; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
define void @tgid_x (i32 addrspace(1)* %out) {
entry:
@@ -142,7 +142,7 @@ entry:
}
; SI-CHECK: @tgid_y
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s3
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s5
; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
define void @tgid_y (i32 addrspace(1)* %out) {
entry:
@@ -152,7 +152,7 @@ entry:
}
; SI-CHECK: @tgid_z
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s4
+; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s6
; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
define void @tgid_z (i32 addrspace(1)* %out) {
entry:
OpenPOWER on IntegriCloud