[AMDGPU] Fix handling of void types in isLegalAddressingMode

It is legal for the type passed to isLegalAddressingMode to be unsized or, more specifically, VoidTy. In this case, we must check the legality of load / stores for all legal types. Directly trying to call getTypeStoreSize is incorrect, and leads to breakage in e.g. Loop Strength Reduction. This change guards against that behaviour. Differential Revision: https://reviews.llvm.org/D40405 llvm-svn: 332409
author: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> 2018-05-15 22:07:51 +0000
committer: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> 2018-05-15 22:07:51 +0000
commit: 57d341c27adfdc13bbf49b068cded3404a26f0af (patch)
tree: 98595a2386819ec940752bdf1848a87e74bbd5a5
parent: 5455038d98b7c48e21603ef86200847834c0a086 (diff)
download: bcm5719-llvm-57d341c27adfdc13bbf49b068cded3404a26f0af.tar.gz
bcm5719-llvm-57d341c27adfdc13bbf49b068cded3404a26f0af.zip
2 files changed, 38 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index cca47f58ba5..d34249eefad 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -746,7 +746,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
     // will use a MUBUF load.
     // FIXME?: We also need to do this if unaligned, but we don't know the
     // alignment here.
-    if (DL.getTypeStoreSize(Ty) < 4)
+    if (Ty->isSized() && DL.getTypeStoreSize(Ty) < 4)
       return isLegalGlobalAddressingMode(AM);
 
     if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS) {
diff --git a/llvm/test/CodeGen/AMDGPU/lsr-void.ll b/llvm/test/CodeGen/AMDGPU/lsr-void.ll
new file mode 100644
index 00000000000..9a32fbec4f5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lsr-void.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=GCN %s
+
+@array = external addrspace(4) constant [32 x [800 x i32]], align 4
+
+; GCN-LABEL: {{^}}test_lsr_voidty:
+define amdgpu_kernel void @test_lsr_voidty() {
+entry:
+  br label %for.body
+
+for.body:                                 ; preds = %for.body.i, %entry
+  br label %for.body.i
+
+for.body.i:                               ; preds = %for.body.i, %for.body
+  %ij = phi i32 [ 0, %for.body ], [ %inc14, %for.body.i ]
+  %tmp = load i32, i32 addrspace(5)* undef, align 4
+  %inc13 = or i32 %ij, 2
+  %shl = shl i32 1, 0
+  %and = and i32 %shl, %tmp
+  %tobool = icmp eq i32 %and, 0
+  %add = mul nuw nsw i32 %inc13, 5
+  %tmp1 = zext i32 %add to i64
+  %arrayidx8 = getelementptr inbounds [32 x [800 x i32]], [32 x [800 x i32]] addrspace(4)* @array, i64 0, i64 undef, i64 %tmp1
+  %tmp2 = load i32, i32 addrspace(4)* %arrayidx8, align 4
+  %and9 = select i1 %tobool, i32 0, i32 %tmp2
+  %xor = xor i32 undef, %and9
+  %inc1 = or i32 %ij, 3
+  %add2 = mul nuw nsw i32 %inc1, 5
+  %add6 = add nuw nsw i32 %add2, 1
+  %tmp3 = zext i32 %add6 to i64
+  %arrayidx9 = getelementptr inbounds [32 x [800 x i32]], [32 x [800 x i32]] addrspace(4)* @array, i64 0, i64 undef, i64 %tmp3
+  %tmp4 = bitcast i32 addrspace(4)* %arrayidx9 to <4 x i32> addrspace(4)*
+  %tmp5 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp4, align 4
+  %reorder_shuffle2 = shufflevector <4 x i32> %tmp5, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  %tmp6 = select <4 x i1> undef, <4 x i32> zeroinitializer, <4 x i32> %reorder_shuffle2
+  %inc14 = add nuw nsw i32 %ij, 4
+  br i1 undef, label %for.body, label %for.body.i
+}
author	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>	2018-05-15 22:07:51 +0000
committer	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>	2018-05-15 22:07:51 +0000
commit	57d341c27adfdc13bbf49b068cded3404a26f0af (patch)
tree	98595a2386819ec940752bdf1848a87e74bbd5a5
parent	5455038d98b7c48e21603ef86200847834c0a086 (diff)
download	bcm5719-llvm-57d341c27adfdc13bbf49b068cded3404a26f0af.tar.gz bcm5719-llvm-57d341c27adfdc13bbf49b068cded3404a26f0af.zip