summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2014-10-15 18:06:43 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2014-10-15 18:06:43 +0000
commit1a74aff84653ec6c428df97fc09fc8cbcb82d334 (patch)
tree210cf7548b4befd6e0e62cf22215720d11571ba3 /llvm/test
parent6c386e229b3117eee899186dc8bd4eff46f9d24d (diff)
downloadbcm5719-llvm-1a74aff84653ec6c428df97fc09fc8cbcb82d334.tar.gz
bcm5719-llvm-1a74aff84653ec6c428df97fc09fc8cbcb82d334.zip
R600/SI: Also try to use 0 base for misaligned 8-byte DS loads.
llvm-svn: 219823
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/R600/ds_read2.ll30
-rw-r--r--llvm/test/CodeGen/R600/ds_write2.ll26
2 files changed, 56 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/R600/ds_read2.ll b/llvm/test/CodeGen/R600/ds_read2.ll
index 74d3a598b96..388d21ba08c 100644
--- a/llvm/test/CodeGen/R600/ds_read2.ll
+++ b/llvm/test/CodeGen/R600/ds_read2.ll
@@ -406,6 +406,36 @@ define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
ret void
}
+@bar = addrspace(3) global [4 x i64] zeroinitializer, align 4
+
+; SI-LABEL: @load_misaligned64_constant_offsets
+; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
+; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
+; SI: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3
+define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
+ %val0 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
+ %val1 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
+ %sum = add i64 %val0, %val1
+ store i64 %sum, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+@bar.large = addrspace(3) global [4096 x i64] zeroinitializer, align 4
+
+; SI-LABEL: @load_misaligned64_constant_large_offsets
+; SI-DAG: V_MOV_B32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
+; SI-DAG: V_MOV_B32_e32 [[BASE1:v[0-9]+]], 0x4000
+; SI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset0:0 offset1:1
+; SI-DAG: DS_READ2_B32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset0:0 offset1:1
+; SI: S_ENDPGM
+define void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) {
+ %val0 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
+ %val1 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
+ %sum = add i64 %val0, %val1
+ store i64 %sum, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
@sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] zeroinitializer, align 4
@sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] zeroinitializer, align 4
diff --git a/llvm/test/CodeGen/R600/ds_write2.ll b/llvm/test/CodeGen/R600/ds_write2.ll
index 6e5bcffb621..99876f9ce07 100644
--- a/llvm/test/CodeGen/R600/ds_write2.ll
+++ b/llvm/test/CodeGen/R600/ds_write2.ll
@@ -341,6 +341,32 @@ define void @store_constant_disjoint_offsets() {
ret void
}
+@bar = addrspace(3) global [4 x i64] zeroinitializer, align 4
+
+; SI-LABEL: @store_misaligned64_constant_offsets
+; SI: V_MOV_B32_e32 [[ZERO:v[0-9]+]], 0{{$}}
+; SI: DS_WRITE2_B32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: DS_WRITE2_B32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
+define void @store_misaligned64_constant_offsets() {
+ store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
+ store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
+ ret void
+}
+
+@bar.large = addrspace(3) global [4096 x i64] zeroinitializer, align 4
+
+; SI-LABEL: @store_misaligned64_constant_large_offsets
+; SI-DAG: V_MOV_B32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
+; SI-DAG: V_MOV_B32_e32 [[BASE1:v[0-9]+]], 0x4000{{$}}
+; SI-DAG: DS_WRITE2_B32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI-DAG: DS_WRITE2_B32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: S_ENDPGM
+define void @store_misaligned64_constant_large_offsets() {
+ store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
+ store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
+ ret void
+}
+
@sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] zeroinitializer, align 4
@sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] zeroinitializer, align 4
OpenPOWER on IntegriCloud