summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp99
-rw-r--r--llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll38
-rw-r--r--llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll170
-rw-r--r--llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll102
4 files changed, 401 insertions, 8 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 396f01e441e..ae1802a83d5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -32,6 +32,7 @@ class AMDGPUPromoteAlloca : public FunctionPass {
private:
const TargetMachine *TM;
Module *Mod;
+ const DataLayout *DL;
MDNode *MaxWorkGroupSizeRange;
// FIXME: This should be per-kernel.
@@ -43,6 +44,20 @@ private:
std::pair<Value *, Value *> getLocalSizeYZ(IRBuilder<> &Builder);
Value *getWorkitemID(IRBuilder<> &Builder, unsigned N);
+ /// BaseAlloca is the alloca root the search started from.
+ /// Val may be that alloca or a recursive user of it.
+ bool collectUsesWithPtrTypes(Value *BaseAlloca,
+ Value *Val,
+ std::vector<Value*> &WorkList) const;
+
+ /// Val is a derived pointer from Alloca. OpIdx0/OpIdx1 are the operand
+ /// indices to an instruction with 2 pointer inputs (e.g. select, icmp).
+ /// Returns true if both operands are derived from the same alloca. Val should
+ /// be the same value as one of the input operands of UseInst.
+ bool binaryOpIsDerivedFromSameAlloca(Value *Alloca, Value *Val,
+ Instruction *UseInst,
+ int OpIdx0, int OpIdx1) const;
+
public:
static char ID;
@@ -50,6 +65,7 @@ public:
FunctionPass(ID),
TM(TM_),
Mod(nullptr),
+ DL(nullptr),
MaxWorkGroupSizeRange(nullptr),
LocalMemAvailable(0),
IsAMDGCN(false),
@@ -63,6 +79,11 @@ public:
}
void handleAlloca(AllocaInst &I);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ FunctionPass::getAnalysisUsage(AU);
+ }
};
} // End anonymous namespace
@@ -80,6 +101,7 @@ bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
return false;
Mod = &M;
+ DL = &Mod->getDataLayout();
// The maximum workitem id.
//
@@ -131,8 +153,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
continue;
if (Use->getParent()->getParent() == &F) {
- LocalMemAvailable -=
- Mod->getDataLayout().getTypeAllocSize(GV.getValueType());
+ LocalMemAvailable -= DL->getTypeAllocSize(GV.getValueType());
break;
}
}
@@ -428,7 +449,39 @@ static bool isCallPromotable(CallInst *CI) {
}
}
-static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
+bool AMDGPUPromoteAlloca::binaryOpIsDerivedFromSameAlloca(Value *BaseAlloca,
+ Value *Val,
+ Instruction *Inst,
+ int OpIdx0,
+ int OpIdx1) const {
+ // Figure out which operand is the one we might not be promoting.
+ Value *OtherOp = Inst->getOperand(OpIdx0);
+ if (Val == OtherOp)
+ OtherOp = Inst->getOperand(OpIdx1);
+
+ Value *OtherObj = GetUnderlyingObject(OtherOp, *DL);
+ if (!isa<AllocaInst>(OtherObj))
+ return false;
+
+ // TODO: We should be able to replace undefs with the right pointer type.
+
+ // TODO: If we know the other base object is another promotable
+ // alloca, not necessarily this alloca, we can do this. The
+ // important part is both must have the same address space at
+ // the end.
+ if (OtherObj != BaseAlloca) {
+ DEBUG(dbgs() << "Found a binary instruction with another alloca object\n");
+ return false;
+ }
+
+ return true;
+}
+
+bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes(
+ Value *BaseAlloca,
+ Value *Val,
+ std::vector<Value*> &WorkList) const {
+
for (User *User : Val->users()) {
if (std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end())
continue;
@@ -441,11 +494,11 @@ static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
continue;
}
- Instruction *UseInst = dyn_cast<Instruction>(User);
- if (UseInst && UseInst->getOpcode() == Instruction::PtrToInt)
+ Instruction *UseInst = cast<Instruction>(User);
+ if (UseInst->getOpcode() == Instruction::PtrToInt)
return false;
- if (StoreInst *SI = dyn_cast_or_null<StoreInst>(UseInst)) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(UseInst)) {
if (SI->isVolatile())
return false;
@@ -464,6 +517,13 @@ static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
return false;
}
+ // Only promote a select if we know that the other select operand
+ // is from another pointer that will also be promoted.
+ if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
+ if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, ICmp, 0, 1))
+ return false;
+ }
+
if (!User->getType()->isPointerTy())
continue;
@@ -474,8 +534,31 @@ static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
return false;
}
+ // Only promote a select if we know that the other select operand is from
+ // another pointer that will also be promoted.
+ if (SelectInst *SI = dyn_cast<SelectInst>(UseInst)) {
+ if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, SI, 1, 2))
+ return false;
+ }
+
+ // Repeat for phis.
+ if (PHINode *Phi = dyn_cast<PHINode>(UseInst)) {
+ // TODO: Handle more complex cases. We should be able to replace loops
+ // over arrays.
+ switch (Phi->getNumIncomingValues()) {
+ case 1:
+ break;
+ case 2:
+ if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, Phi, 0, 1))
+ return false;
+ break;
+ default:
+ return false;
+ }
+ }
+
WorkList.push_back(User);
- if (!collectUsesWithPtrTypes(User, WorkList))
+ if (!collectUsesWithPtrTypes(BaseAlloca, User, WorkList))
return false;
}
@@ -516,7 +599,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
std::vector<Value*> WorkList;
- if (!collectUsesWithPtrTypes(&I, WorkList)) {
+ if (!collectUsesWithPtrTypes(&I, &I, WorkList)) {
DEBUG(dbgs() << " Do not know how to convert all uses\n");
return;
}
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll
new file mode 100644
index 00000000000..03c09f063eb
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll
@@ -0,0 +1,38 @@
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s
+
+; This normally would be fixed by instcombine to be compare to the GEP
+; indices
+
+; CHECK-LABEL: @lds_promoted_alloca_icmp_same_derived_pointer(
+; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_icmp_same_derived_pointer.alloca, i32 0, i32 %{{[0-9]+}}
+; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a
+; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %b
+; CHECK: %cmp = icmp eq i32 addrspace(3)* %ptr0, %ptr1
+define void @lds_promoted_alloca_icmp_same_derived_pointer(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+ %alloca = alloca [16 x i32], align 4
+ %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
+ %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
+ %cmp = icmp eq i32* %ptr0, %ptr1
+ %zext = zext i1 %cmp to i32
+ store volatile i32 %zext, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @lds_promoted_alloca_icmp_unknown_ptr(
+; CHECK: %alloca = alloca [16 x i32], align 4
+; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
+; CHECK: %ptr1 = call i32* @get_unknown_pointer()
+; CHECK: %cmp = icmp eq i32* %ptr0, %ptr1
+define void @lds_promoted_alloca_icmp_unknown_ptr(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+ %alloca = alloca [16 x i32], align 4
+ %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
+ %ptr1 = call i32* @get_unknown_pointer()
+ %cmp = icmp eq i32* %ptr0, %ptr1
+ %zext = zext i1 %cmp to i32
+ store volatile i32 %zext, i32 addrspace(1)* %out
+ ret void
+}
+
+declare i32* @get_unknown_pointer() #0
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll
new file mode 100644
index 00000000000..c76cc85a11d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll
@@ -0,0 +1,170 @@
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s
+
+
+; CHECK-LABEL: @branch_ptr_var_same_alloca(
+; CHECK: getelementptr inbounds [256 x [64 x i32]], [256 x [64 x i32]] addrspace(3)* @branch_ptr_var_same_alloca.alloca, i32 0, i32 %{{[0-9]+}}
+
+; CHECK: if:
+; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %a
+
+; CHECK: else:
+; CHECK: %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* %15, i32 0, i32 %b
+
+; CHECK: endif:
+; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
+; CHECK: store i32 0, i32 addrspace(3)* %phi.ptr, align 4
+define void @branch_ptr_var_same_alloca(i32 %a, i32 %b) #0 {
+entry:
+ %alloca = alloca [64 x i32], align 4
+ br i1 undef, label %if, label %else
+
+if:
+ %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
+ br label %endif
+
+else:
+ %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %b
+ br label %endif
+
+endif:
+ %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
+ store i32 0, i32* %phi.ptr, align 4
+ ret void
+}
+
+; CHECK-LABEL: @one_phi_value(
+; CHECK: getelementptr inbounds [256 x [64 x i32]], [256 x [64 x i32]] addrspace(3)* @one_phi_value.alloca, i32 0, i32 %14
+; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %a
+
+; CHECK: br label %exit
+; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %entry ]
+; CHECK: store i32 0, i32 addrspace(3)* %phi.ptr, align 4
+define void @one_phi_value(i32 %a) #0 {
+entry:
+ %alloca = alloca [64 x i32], align 4
+ %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
+ br label %exit
+
+exit:
+ %phi.ptr = phi i32* [ %arrayidx0, %entry ]
+ store i32 0, i32* %phi.ptr, align 4
+ ret void
+}
+
+; CHECK-LABEL: @branch_ptr_alloca_unknown_obj(
+; CHECK: %alloca = alloca [64 x i32], align 4
+
+; CHECK: if:
+; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
+
+; CHECK: else:
+; CHECK: %arrayidx1 = call i32* @get_unknown_pointer()
+
+; CHECK: endif:
+; CHECK: %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
+; CHECK: store i32 0, i32* %phi.ptr, align 4
+define void @branch_ptr_alloca_unknown_obj(i32 %a, i32 %b) #0 {
+entry:
+ %alloca = alloca [64 x i32], align 4
+ br i1 undef, label %if, label %else
+
+if:
+ %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
+ br label %endif
+
+else:
+ %arrayidx1 = call i32* @get_unknown_pointer()
+ br label %endif
+
+endif:
+ %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
+ store i32 0, i32* %phi.ptr, align 4
+ ret void
+}
+
+; kernel void ptr_induction_var_same_alloca(void)
+; {
+; int alloca[64];
+; int i = 0;
+
+; #pragma nounroll
+; for (int* p = &alloca[2], *e = &alloca[48]; p != e; ++p, ++i)
+; {
+; *p = i;
+; }
+; }
+
+; FIXME: This should be promotable. We need to use
+; GetUnderlyingObjects when looking at the icmp user.
+
+; CHECK-LABEL: @ptr_induction_var_same_alloca(
+; CHECK: %alloca = alloca [64 x i32], align 4
+; CHECK: phi i32* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ]
+define void @ptr_induction_var_same_alloca() #0 {
+entry:
+ %alloca = alloca [64 x i32], align 4
+ %arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 2
+ %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 48
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %for.body, %entry
+ %i.09 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %p.08 = phi i32* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ]
+ store i32 %i.09, i32* %p.08, align 4
+ %incdec.ptr = getelementptr inbounds i32, i32* %p.08, i32 1
+ %inc = add nuw nsw i32 %i.09, 1
+ %cmp = icmp eq i32* %incdec.ptr, %arrayidx1
+ br i1 %cmp, label %for.cond.cleanup, label %for.body
+}
+
+
+; extern int* get_unknown_pointer(void);
+
+; kernel void ptr_induction_var_alloca_unknown(void)
+; {
+; int alloca[64];
+; int i = 0;
+;
+; for (int* p = &alloca[2], *e = get_unknown_pointer(); p != e; ++p, ++i)
+; {
+; *p = i;
+; }
+; }
+
+; CHECK-LABEL: @ptr_induction_var_alloca_unknown(
+; CHECK: %alloca = alloca [64 x i32], align 4
+; CHECK: %p.08 = phi i32* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ]
+; CHECK: %cmp = icmp eq i32* %incdec.ptr, %call
+define void @ptr_induction_var_alloca_unknown() #0 {
+entry:
+ %alloca = alloca [64 x i32], align 4
+ %arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 2
+ %call = tail call i32* @get_unknown_pointer() #2
+ %cmp.7 = icmp eq i32* %arrayidx, %call
+ br i1 %cmp.7, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %for.body
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ ret void
+
+for.body: ; preds = %for.body, %for.body.preheader
+ %i.09 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %p.08 = phi i32* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ]
+ store i32 %i.09, i32* %p.08, align 4
+ %incdec.ptr = getelementptr inbounds i32, i32* %p.08, i32 1
+ %inc = add nuw nsw i32 %i.09, 1
+ %cmp = icmp eq i32* %incdec.ptr, %call
+ br i1 %cmp, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+declare i32* @get_unknown_pointer() #0
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll
new file mode 100644
index 00000000000..1446acf8484
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll
@@ -0,0 +1,102 @@
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s
+
+; CHECK-LABEL: @lds_promoted_alloca_select_invalid_pointer_operand(
+; CHECK: %alloca = alloca i32
+; CHECK: select i1 undef, i32* undef, i32* %alloca
+define void @lds_promoted_alloca_select_invalid_pointer_operand() #0 {
+ %alloca = alloca i32, align 4
+ %select = select i1 undef, i32* undef, i32* %alloca
+ store i32 0, i32* %select, align 4
+ ret void
+}
+
+; CHECK-LABEL: @lds_promote_alloca_select_two_derived_pointers(
+; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promote_alloca_select_two_derived_pointers.alloca, i32 0, i32 %{{[0-9]+}}
+; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a
+; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %b
+; CHECK: %select = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1
+; CHECK: store i32 0, i32 addrspace(3)* %select, align 4
+define void @lds_promote_alloca_select_two_derived_pointers(i32 %a, i32 %b) #0 {
+ %alloca = alloca [16 x i32], align 4
+ %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
+ %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
+ %select = select i1 undef, i32* %ptr0, i32* %ptr1
+ store i32 0, i32* %select, align 4
+ ret void
+}
+
+; FIXME: This should be promotable but requires knowing that both will be promoted first.
+
+; CHECK-LABEL: @lds_promote_alloca_select_two_allocas(
+; CHECK: %alloca0 = alloca i32, i32 16, align 4
+; CHECK: %alloca1 = alloca i32, i32 16, align 4
+; CHECK: %ptr0 = getelementptr inbounds i32, i32* %alloca0, i32 %a
+; CHECK: %ptr1 = getelementptr inbounds i32, i32* %alloca1, i32 %b
+; CHECK: %select = select i1 undef, i32* %ptr0, i32* %ptr1
+define void @lds_promote_alloca_select_two_allocas(i32 %a, i32 %b) #0 {
+ %alloca0 = alloca i32, i32 16, align 4
+ %alloca1 = alloca i32, i32 16, align 4
+ %ptr0 = getelementptr inbounds i32, i32* %alloca0, i32 %a
+ %ptr1 = getelementptr inbounds i32, i32* %alloca1, i32 %b
+ %select = select i1 undef, i32* %ptr0, i32* %ptr1
+ store i32 0, i32* %select, align 4
+ ret void
+}
+
+; TODO: Maybe this should be canonicalized to select on the constant and GEP after.
+; CHECK-LABEL: @lds_promote_alloca_select_two_derived_constant_pointers(
+; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promote_alloca_select_two_derived_constant_pointers.alloca, i32 0, i32 %{{[0-9]+}}
+; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 1
+; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 3
+; CHECK: %select = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1
+; CHECK: store i32 0, i32 addrspace(3)* %select, align 4
+define void @lds_promote_alloca_select_two_derived_constant_pointers() #0 {
+ %alloca = alloca [16 x i32], align 4
+ %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 1
+ %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 3
+ %select = select i1 undef, i32* %ptr0, i32* %ptr1
+ store i32 0, i32* %select, align 4
+ ret void
+}
+
+; CHECK-LABEL: @lds_promoted_alloca_select_input_select(
+; CHECK: getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_select_input_select.alloca, i32 0, i32 %{{[0-9]+}}
+; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %a
+; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %b
+; CHECK: %ptr2 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %c
+; CHECK: %select0 = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1
+; CHECK: %select1 = select i1 undef, i32 addrspace(3)* %select0, i32 addrspace(3)* %ptr2
+; CHECK: store i32 0, i32 addrspace(3)* %select1, align 4
+define void @lds_promoted_alloca_select_input_select(i32 %a, i32 %b, i32 %c) #0 {
+ %alloca = alloca [16 x i32], align 4
+ %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
+ %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
+ %ptr2 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %c
+ %select0 = select i1 undef, i32* %ptr0, i32* %ptr1
+ %select1 = select i1 undef, i32* %select0, i32* %ptr2
+ store i32 0, i32* %select1, align 4
+ ret void
+}
+
+define void @lds_promoted_alloca_select_input_phi(i32 %a, i32 %b, i32 %c) #0 {
+entry:
+ %alloca = alloca [16 x i32], align 4
+ %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
+ %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
+ store i32 0, i32* %ptr0
+ br i1 undef, label %bb1, label %bb2
+
+bb1:
+ %ptr2 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %c
+ %select0 = select i1 undef, i32* undef, i32* %ptr2
+ store i32 0, i32* %ptr1
+ br label %bb2
+
+bb2:
+ %phi.ptr = phi i32* [ %ptr0, %entry ], [ %select0, %bb1 ]
+ %select1 = select i1 undef, i32* %phi.ptr, i32* %ptr1
+ store i32 0, i32* %select1, align 4
+ ret void
+}
+
+attributes #0 = { norecurse nounwind }
OpenPOWER on IntegriCloud