summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/InferAddressSpaces/AMDGPU
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2017-04-28 22:52:41 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2017-04-28 22:52:41 +0000
commite0f9e984fd7e6c02016b16eb355e788a645091ee (patch)
tree763a00adc11bd16764b001e63dac17bc37e251f4 /llvm/test/Transforms/InferAddressSpaces/AMDGPU
parente2f11690fe01335ac311abdd612d194764f44afc (diff)
downloadbcm5719-llvm-e0f9e984fd7e6c02016b16eb355e788a645091ee.tar.gz
bcm5719-llvm-e0f9e984fd7e6c02016b16eb355e788a645091ee.zip
InferAddressSpaces: Search constant expressions for addrspacecasts
These are pretty common when using local memory, and the 64-bit generic addressing is much more expensive to compute. llvm-svn: 301711
Diffstat (limited to 'llvm/test/Transforms/InferAddressSpaces/AMDGPU')
-rw-r--r--llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll21
1 files changed, 19 insertions, 2 deletions
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll
index 8b482ba8a29..e2c255dcb3e 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll
@@ -15,9 +15,8 @@ define void @simplified_constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) {
ret void
}
-; FIXME: Should be able to eliminate inner constantexpr addrspacecast.
; CHECK-LABEL: @constexpr_gep_addrspacecast(
-; CHECK: %gep0 = getelementptr inbounds double, double addrspace(3)* addrspacecast (double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384) to double addrspace(3)*), i64 %idx0
+; CHECK-NEXT: %gep0 = getelementptr inbounds double, double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384), i64 %idx0
; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep0, align 8
define void @constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) {
%gep0 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx0
@@ -54,3 +53,21 @@ define amdgpu_kernel void @vector_gep(<4 x [1024 x i32] addrspace(3)*> %array) n
store i32 99, i32 addrspace(4)* %p3
ret void
}
+
+; CHECK-LABEL: @repeated_constexpr_gep_addrspacecast(
+; CHECK-NEXT: %gep0 = getelementptr inbounds double, double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384), i64 %idx0
+; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep0, align 8
+; CHECK-NEXT: %gep1 = getelementptr inbounds double, double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384), i64 %idx1
+; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep1, align 8
+; CHECK-NEXT: ret void
+define void @repeated_constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) {
+ %gep0 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx0
+ %asc0 = addrspacecast double addrspace(4)* %gep0 to double addrspace(3)*
+ store double 1.0, double addrspace(3)* %asc0, align 8
+
+ %gep1 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx1
+ %asc1 = addrspacecast double addrspace(4)* %gep1 to double addrspace(3)*
+ store double 1.0, double addrspace(3)* %asc1, align 8
+
+ ret void
+}
OpenPOWER on IntegriCloud