summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorJingyue Wu <jingyue@google.com>2015-05-29 17:00:27 +0000
committerJingyue Wu <jingyue@google.com>2015-05-29 17:00:27 +0000
commit995dde27995e3f957966fd52d721d65d60bda43b (patch)
tree07a44f22f2948de96cd0013a08b6a9382661923f /llvm/test
parenta84feb17274882ad0830f5dc538307942db155ab (diff)
downloadbcm5719-llvm-995dde27995e3f957966fd52d721d65d60bda43b.tar.gz
bcm5719-llvm-995dde27995e3f957966fd52d721d65d60bda43b.zip
[NVPTXFavorNonGenericAddrSpaces] recursively trace into GEP and BitCast
Summary: This patch allows NVPTXFavorNonGenericAddrSpaces to remove addrspacecast from longer chains consisting of GEPs and BitCasts. For example, it can now optimize %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* %1 = gep [10 x float]* %0, i64 0, i64 %i %2 = bitcast float* %1 to i32* %3 = load i32* %2 ; emits ld.u32 to %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)* %3 = load i32 addrspace(3)* %1 ; emits ld.shared.f32 Test Plan: @ld_int_from_global_float in access-non-generic.ll Reviewers: broune, eliben, jholewinski, meheff Subscribers: jholewinski, llvm-commits Differential Revision: http://reviews.llvm.org/D10074 llvm-svn: 238574
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/NVPTX/access-non-generic.ll16
1 files changed, 16 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/NVPTX/access-non-generic.ll b/llvm/test/CodeGen/NVPTX/access-non-generic.ll
index e709302918f..5deefe881e3 100644
--- a/llvm/test/CodeGen/NVPTX/access-non-generic.ll
+++ b/llvm/test/CodeGen/NVPTX/access-non-generic.ll
@@ -85,6 +85,22 @@ define i32 @ld_int_from_float() {
ret i32 %1
}
+define i32 @ld_int_from_global_float(float addrspace(1)* %input, i32 %i, i32 %j) {
+; IR-LABEL: @ld_int_from_global_float(
+; PTX-LABEL: ld_int_from_global_float(
+ %1 = addrspacecast float addrspace(1)* %input to float*
+ %2 = getelementptr float, float* %1, i32 %i
+; IR-NEXT: getelementptr float, float addrspace(1)* %input, i32 %i
+ %3 = getelementptr float, float* %2, i32 %j
+; IR-NEXT: getelementptr float, float addrspace(1)* {{%[^,]+}}, i32 %j
+ %4 = bitcast float* %3 to i32*
+; IR-NEXT: bitcast float addrspace(1)* {{%[^ ]+}} to i32 addrspace(1)*
+ %5 = load i32, i32* %4
+; IR-NEXT: load i32, i32 addrspace(1)* {{%.+}}
+; PTX-LABEL: ld.global
+ ret i32 %5
+}
+
declare void @llvm.cuda.syncthreads() #3
attributes #3 = { noduplicate nounwind }
OpenPOWER on IntegriCloud