summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-05-12 01:58:58 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-05-12 01:58:58 +0000
commita61cb48dd2c40cf4c9b53c8aa00c24543d23cd60 (patch)
tree24c0fccc4629f35e8a0bf694a2a462a5e0ece0ab /llvm/test/CodeGen
parentde60ad33b37e19cbf73b53feed6428886d358f02 (diff)
downloadbcm5719-llvm-a61cb48dd2c40cf4c9b53c8aa00c24543d23cd60.tar.gz
bcm5719-llvm-a61cb48dd2c40cf4c9b53c8aa00c24543d23cd60.zip
AMDGPU: Fix breaking IR on instructions with multiple pointer operands
The promote alloca pass would attempt to promote an alloca with a select, icmp, or phi user, even though the other operand was from a non-promotable source, producing a select on two different pointer types. Only do this if we know that both operands derive from the same alloca. In the future we should be able to relax this to an alloca which will also be promoted. llvm-svn: 269265
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll38
-rw-r--r--llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll170
-rw-r--r--llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll102
3 files changed, 310 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll
new file mode 100644
index 00000000000..03c09f063eb
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll
@@ -0,0 +1,38 @@
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s
+
+; This normally would be fixed by instcombine to be compare to the GEP
+; indices
+
+; CHECK-LABEL: @lds_promoted_alloca_icmp_same_derived_pointer(
+; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_icmp_same_derived_pointer.alloca, i32 0, i32 %{{[0-9]+}}
+; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a
+; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %b
+; CHECK: %cmp = icmp eq i32 addrspace(3)* %ptr0, %ptr1
+define void @lds_promoted_alloca_icmp_same_derived_pointer(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+ %alloca = alloca [16 x i32], align 4
+ %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
+ %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
+ %cmp = icmp eq i32* %ptr0, %ptr1
+ %zext = zext i1 %cmp to i32
+ store volatile i32 %zext, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @lds_promoted_alloca_icmp_unknown_ptr(
+; CHECK: %alloca = alloca [16 x i32], align 4
+; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
+; CHECK: %ptr1 = call i32* @get_unknown_pointer()
+; CHECK: %cmp = icmp eq i32* %ptr0, %ptr1
+define void @lds_promoted_alloca_icmp_unknown_ptr(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+ %alloca = alloca [16 x i32], align 4
+ %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
+ %ptr1 = call i32* @get_unknown_pointer()
+ %cmp = icmp eq i32* %ptr0, %ptr1
+ %zext = zext i1 %cmp to i32
+ store volatile i32 %zext, i32 addrspace(1)* %out
+ ret void
+}
+
+declare i32* @get_unknown_pointer() #0
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll
new file mode 100644
index 00000000000..c76cc85a11d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll
@@ -0,0 +1,170 @@
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s
+
+
+; CHECK-LABEL: @branch_ptr_var_same_alloca(
+; CHECK: getelementptr inbounds [256 x [64 x i32]], [256 x [64 x i32]] addrspace(3)* @branch_ptr_var_same_alloca.alloca, i32 0, i32 %{{[0-9]+}}
+
+; CHECK: if:
+; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %a
+
+; CHECK: else:
+; CHECK: %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* %15, i32 0, i32 %b
+
+; CHECK: endif:
+; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
+; CHECK: store i32 0, i32 addrspace(3)* %phi.ptr, align 4
+define void @branch_ptr_var_same_alloca(i32 %a, i32 %b) #0 {
+entry:
+ %alloca = alloca [64 x i32], align 4
+ br i1 undef, label %if, label %else
+
+if:
+ %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
+ br label %endif
+
+else:
+ %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %b
+ br label %endif
+
+endif:
+ %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
+ store i32 0, i32* %phi.ptr, align 4
+ ret void
+}
+
+; CHECK-LABEL: @one_phi_value(
+; CHECK: getelementptr inbounds [256 x [64 x i32]], [256 x [64 x i32]] addrspace(3)* @one_phi_value.alloca, i32 0, i32 %14
+; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %a
+
+; CHECK: br label %exit
+; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %entry ]
+; CHECK: store i32 0, i32 addrspace(3)* %phi.ptr, align 4
+define void @one_phi_value(i32 %a) #0 {
+entry:
+ %alloca = alloca [64 x i32], align 4
+ %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
+ br label %exit
+
+exit:
+ %phi.ptr = phi i32* [ %arrayidx0, %entry ]
+ store i32 0, i32* %phi.ptr, align 4
+ ret void
+}
+
+; CHECK-LABEL: @branch_ptr_alloca_unknown_obj(
+; CHECK: %alloca = alloca [64 x i32], align 4
+
+; CHECK: if:
+; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
+
+; CHECK: else:
+; CHECK: %arrayidx1 = call i32* @get_unknown_pointer()
+
+; CHECK: endif:
+; CHECK: %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
+; CHECK: store i32 0, i32* %phi.ptr, align 4
+define void @branch_ptr_alloca_unknown_obj(i32 %a, i32 %b) #0 {
+entry:
+ %alloca = alloca [64 x i32], align 4
+ br i1 undef, label %if, label %else
+
+if:
+ %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
+ br label %endif
+
+else:
+ %arrayidx1 = call i32* @get_unknown_pointer()
+ br label %endif
+
+endif:
+ %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
+ store i32 0, i32* %phi.ptr, align 4
+ ret void
+}
+
+; kernel void ptr_induction_var_same_alloca(void)
+; {
+; int alloca[64];
+; int i = 0;
+
+; #pragma nounroll
+; for (int* p = &alloca[2], *e = &alloca[48]; p != e; ++p, ++i)
+; {
+; *p = i;
+; }
+; }
+
+; FIXME: This should be promotable. We need to use
+; GetUnderlyingObjects when looking at the icmp user.
+
+; CHECK-LABEL: @ptr_induction_var_same_alloca(
+; CHECK: %alloca = alloca [64 x i32], align 4
+; CHECK: phi i32* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ]
+define void @ptr_induction_var_same_alloca() #0 {
+entry:
+ %alloca = alloca [64 x i32], align 4
+ %arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 2
+ %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 48
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %for.body, %entry
+ %i.09 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %p.08 = phi i32* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ]
+ store i32 %i.09, i32* %p.08, align 4
+ %incdec.ptr = getelementptr inbounds i32, i32* %p.08, i32 1
+ %inc = add nuw nsw i32 %i.09, 1
+ %cmp = icmp eq i32* %incdec.ptr, %arrayidx1
+ br i1 %cmp, label %for.cond.cleanup, label %for.body
+}
+
+
+; extern int* get_unknown_pointer(void);
+
+; kernel void ptr_induction_var_alloca_unknown(void)
+; {
+; int alloca[64];
+; int i = 0;
+;
+; for (int* p = &alloca[2], *e = get_unknown_pointer(); p != e; ++p, ++i)
+; {
+; *p = i;
+; }
+; }
+
+; CHECK-LABEL: @ptr_induction_var_alloca_unknown(
+; CHECK: %alloca = alloca [64 x i32], align 4
+; CHECK: %p.08 = phi i32* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ]
+; CHECK: %cmp = icmp eq i32* %incdec.ptr, %call
+define void @ptr_induction_var_alloca_unknown() #0 {
+entry:
+ %alloca = alloca [64 x i32], align 4
+ %arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 2
+ %call = tail call i32* @get_unknown_pointer() #2
+ %cmp.7 = icmp eq i32* %arrayidx, %call
+ br i1 %cmp.7, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %for.body
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ ret void
+
+for.body: ; preds = %for.body, %for.body.preheader
+ %i.09 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %p.08 = phi i32* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ]
+ store i32 %i.09, i32* %p.08, align 4
+ %incdec.ptr = getelementptr inbounds i32, i32* %p.08, i32 1
+ %inc = add nuw nsw i32 %i.09, 1
+ %cmp = icmp eq i32* %incdec.ptr, %call
+ br i1 %cmp, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+declare i32* @get_unknown_pointer() #0
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll
new file mode 100644
index 00000000000..1446acf8484
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll
@@ -0,0 +1,102 @@
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s
+
+; CHECK-LABEL: @lds_promoted_alloca_select_invalid_pointer_operand(
+; CHECK: %alloca = alloca i32
+; CHECK: select i1 undef, i32* undef, i32* %alloca
+define void @lds_promoted_alloca_select_invalid_pointer_operand() #0 {
+ %alloca = alloca i32, align 4
+ %select = select i1 undef, i32* undef, i32* %alloca
+ store i32 0, i32* %select, align 4
+ ret void
+}
+
+; CHECK-LABEL: @lds_promote_alloca_select_two_derived_pointers(
+; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promote_alloca_select_two_derived_pointers.alloca, i32 0, i32 %{{[0-9]+}}
+; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a
+; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %b
+; CHECK: %select = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1
+; CHECK: store i32 0, i32 addrspace(3)* %select, align 4
+define void @lds_promote_alloca_select_two_derived_pointers(i32 %a, i32 %b) #0 {
+ %alloca = alloca [16 x i32], align 4
+ %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
+ %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
+ %select = select i1 undef, i32* %ptr0, i32* %ptr1
+ store i32 0, i32* %select, align 4
+ ret void
+}
+
+; FIXME: This should be promotable but requires knowing that both will be promoted first.
+
+; CHECK-LABEL: @lds_promote_alloca_select_two_allocas(
+; CHECK: %alloca0 = alloca i32, i32 16, align 4
+; CHECK: %alloca1 = alloca i32, i32 16, align 4
+; CHECK: %ptr0 = getelementptr inbounds i32, i32* %alloca0, i32 %a
+; CHECK: %ptr1 = getelementptr inbounds i32, i32* %alloca1, i32 %b
+; CHECK: %select = select i1 undef, i32* %ptr0, i32* %ptr1
+define void @lds_promote_alloca_select_two_allocas(i32 %a, i32 %b) #0 {
+ %alloca0 = alloca i32, i32 16, align 4
+ %alloca1 = alloca i32, i32 16, align 4
+ %ptr0 = getelementptr inbounds i32, i32* %alloca0, i32 %a
+ %ptr1 = getelementptr inbounds i32, i32* %alloca1, i32 %b
+ %select = select i1 undef, i32* %ptr0, i32* %ptr1
+ store i32 0, i32* %select, align 4
+ ret void
+}
+
+; TODO: Maybe this should be canonicalized to select on the constant and GEP after.
+; CHECK-LABEL: @lds_promote_alloca_select_two_derived_constant_pointers(
+; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promote_alloca_select_two_derived_constant_pointers.alloca, i32 0, i32 %{{[0-9]+}}
+; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 1
+; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 3
+; CHECK: %select = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1
+; CHECK: store i32 0, i32 addrspace(3)* %select, align 4
+define void @lds_promote_alloca_select_two_derived_constant_pointers() #0 {
+ %alloca = alloca [16 x i32], align 4
+ %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 1
+ %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 3
+ %select = select i1 undef, i32* %ptr0, i32* %ptr1
+ store i32 0, i32* %select, align 4
+ ret void
+}
+
+; CHECK-LABEL: @lds_promoted_alloca_select_input_select(
+; CHECK: getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_select_input_select.alloca, i32 0, i32 %{{[0-9]+}}
+; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %a
+; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %b
+; CHECK: %ptr2 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %c
+; CHECK: %select0 = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1
+; CHECK: %select1 = select i1 undef, i32 addrspace(3)* %select0, i32 addrspace(3)* %ptr2
+; CHECK: store i32 0, i32 addrspace(3)* %select1, align 4
+define void @lds_promoted_alloca_select_input_select(i32 %a, i32 %b, i32 %c) #0 {
+ %alloca = alloca [16 x i32], align 4
+ %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
+ %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
+ %ptr2 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %c
+ %select0 = select i1 undef, i32* %ptr0, i32* %ptr1
+ %select1 = select i1 undef, i32* %select0, i32* %ptr2
+ store i32 0, i32* %select1, align 4
+ ret void
+}
+
+define void @lds_promoted_alloca_select_input_phi(i32 %a, i32 %b, i32 %c) #0 {
+entry:
+ %alloca = alloca [16 x i32], align 4
+ %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
+ %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
+ store i32 0, i32* %ptr0
+ br i1 undef, label %bb1, label %bb2
+
+bb1:
+ %ptr2 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %c
+ %select0 = select i1 undef, i32* undef, i32* %ptr2
+ store i32 0, i32* %ptr1
+ br label %bb2
+
+bb2:
+ %phi.ptr = phi i32* [ %ptr0, %entry ], [ %select0, %bb1 ]
+ %select1 = select i1 undef, i32* %phi.ptr, i32* %ptr1
+ store i32 0, i32* %select1, align 4
+ ret void
+}
+
+attributes #0 = { norecurse nounwind }
OpenPOWER on IntegriCloud