summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm/Transforms/Utils/VectorUtils.h15
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp15
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp29
-rw-r--r--llvm/test/Transforms/LoopVectorize/intrinsic.ll102
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/intrinsic.ll267
5 files changed, 426 insertions, 2 deletions
diff --git a/llvm/include/llvm/Transforms/Utils/VectorUtils.h b/llvm/include/llvm/Transforms/Utils/VectorUtils.h
index e1d6c562923..44a7149eee9 100644
--- a/llvm/include/llvm/Transforms/Utils/VectorUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/VectorUtils.h
@@ -48,12 +48,27 @@ static inline bool isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::pow:
case Intrinsic::fma:
case Intrinsic::fmuladd:
+ case Intrinsic::ctlz:
+ case Intrinsic::cttz:
+ case Intrinsic::powi:
return true;
default:
return false;
}
}
+static bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
+ unsigned ScalarOpdIdx) {
+ switch (ID) {
+ case Intrinsic::ctlz:
+ case Intrinsic::cttz:
+ case Intrinsic::powi:
+ return (ScalarOpdIdx == 1);
+ default:
+ return false;
+ }
+}
+
static Intrinsic::ID checkUnaryFloatSignature(const CallInst &I,
Intrinsic::ID ValidIntrinsicID) {
if (I.getNumArgOperands() != 1 ||
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index ba2b7eea363..15d4c1c79d2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3123,9 +3123,14 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
scalarizeInstruction(it);
break;
default:
+ bool HasScalarOpd = hasVectorInstrinsicScalarOpd(ID, 1);
for (unsigned Part = 0; Part < UF; ++Part) {
SmallVector<Value *, 4> Args;
for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
+ if (HasScalarOpd && i == 1) {
+ Args.push_back(CI->getArgOperand(i));
+ continue;
+ }
VectorParts &Arg = getVectorValue(CI->getArgOperand(i));
Args.push_back(Arg[Part]);
}
@@ -3474,6 +3479,16 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
return false;
}
+ // Intrinsics such as powi,cttz and ctlz are legal to vectorize if the
+ // second argument is the same (i.e. loop invariant)
+ if (CI &&
+ hasVectorInstrinsicScalarOpd(getIntrinsicIDForCall(CI, TLI), 1)) {
+ if (!SE->isLoopInvariant(SE->getSCEV(CI->getOperand(1)), TheLoop)) {
+ DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n");
+ return false;
+ }
+ }
+
// Check that the instruction return type is vectorizable.
// Also, we can't vectorize extractelement instructions.
if ((!VectorType::isValidElementType(it->getType()) &&
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e13ba956c39..ce0a009e1cb 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -961,9 +961,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
return;
}
-
Function *Int = CI->getCalledFunction();
-
+ Value *A1I = nullptr;
+ if (hasVectorInstrinsicScalarOpd(ID, 1))
+ A1I = CI->getArgOperand(1);
for (unsigned i = 1, e = VL.size(); i != e; ++i) {
CallInst *CI2 = dyn_cast<CallInst>(VL[i]);
if (!CI2 || CI2->getCalledFunction() != Int ||
@@ -973,6 +974,18 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
<< "\n");
return;
}
+ // ctlz,cttz and powi are special intrinsics whose second argument
+ // should be same in order for them to be vectorized.
+ if (hasVectorInstrinsicScalarOpd(ID, 1)) {
+ Value *A1J = CI2->getArgOperand(1);
+ if (A1I != A1J) {
+ newTreeEntry(VL, false);
+ DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
+ << " argument "<< A1I<<"!=" << A1J
+ << "\n");
+ return;
+ }
+ }
}
newTreeEntry(VL, true);
@@ -1652,9 +1665,21 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::Call: {
CallInst *CI = cast<CallInst>(VL0);
setInsertPointAfterBundle(E->Scalars);
+ Function *FI;
+ Intrinsic::ID IID = Intrinsic::not_intrinsic;
+ if (CI && (FI = CI->getCalledFunction())) {
+ IID = (Intrinsic::ID) FI->getIntrinsicID();
+ }
std::vector<Value *> OpVecs;
for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) {
ValueList OpVL;
+ // ctlz,cttz and powi are special intrinsics whose second argument is
+ // a scalar. This argument should not be vectorized.
+ if (hasVectorInstrinsicScalarOpd(IID, 1) && j == 1) {
+ CallInst *CEI = cast<CallInst>(E->Scalars[0]);
+ OpVecs.push_back(CEI->getArgOperand(j));
+ continue;
+ }
for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
CallInst *CEI = cast<CallInst>(E->Scalars[i]);
OpVL.push_back(CEI->getArgOperand(j));
diff --git a/llvm/test/Transforms/LoopVectorize/intrinsic.ll b/llvm/test/Transforms/LoopVectorize/intrinsic.ll
index c3d570c03a7..7dfaf03b0f2 100644
--- a/llvm/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/llvm/test/Transforms/LoopVectorize/intrinsic.ll
@@ -1090,3 +1090,105 @@ for.end: ; preds = %for.body
ret void
}
+declare double @llvm.powi.f64(double %Val, i32 %power) nounwind readnone
+
+;CHECK-LABEL: @powi_f64(
+;CHECK: llvm.powi.v4f64
+;CHECK: ret void
+define void @powi_f64(i32 %n, double* noalias %y, double* noalias %x, i32 %P) nounwind uwtable {
+entry:
+ %cmp9 = icmp sgt i32 %n, 0
+ br i1 %cmp9, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+ %0 = load double* %arrayidx, align 8
+ %call = tail call double @llvm.powi.f64(double %0, i32 %P) nounwind readnone
+ %arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv
+ store double %call, double* %arrayidx4, align 8
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+;CHECK-LABEL: @powi_f64_neg(
+;CHECK-NOT: llvm.powi.v4f64
+;CHECK: ret void
+define void @powi_f64_neg(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+ %cmp9 = icmp sgt i32 %n, 0
+ br i1 %cmp9, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+ %0 = load double* %arrayidx, align 8
+ %1 = trunc i64 %indvars.iv to i32
+ %call = tail call double @llvm.powi.f64(double %0, i32 %1) nounwind readnone
+ %arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv
+ store double %call, double* %arrayidx4, align 8
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare i64 @llvm.cttz.i64 (i64, i1) nounwind readnone
+
+;CHECK-LABEL: @cttz_f64(
+;CHECK: llvm.cttz.v4i64
+;CHECK: ret void
+define void @cttz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable {
+entry:
+ %cmp9 = icmp sgt i32 %n, 0
+ br i1 %cmp9, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i64* %y, i64 %indvars.iv
+ %0 = load i64* %arrayidx, align 8
+ %call = tail call i64 @llvm.cttz.i64(i64 %0, i1 true) nounwind readnone
+ %arrayidx4 = getelementptr inbounds i64* %x, i64 %indvars.iv
+ store i64 %call, i64* %arrayidx4, align 8
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare i64 @llvm.ctlz.i64 (i64, i1) nounwind readnone
+
+;CHECK-LABEL: @ctlz_f64(
+;CHECK: llvm.ctlz.v4i64
+;CHECK: ret void
+define void @ctlz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable {
+entry:
+ %cmp9 = icmp sgt i32 %n, 0
+ br i1 %cmp9, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i64* %y, i64 %indvars.iv
+ %0 = load i64* %arrayidx, align 8
+ %call = tail call i64 @llvm.ctlz.i64(i64 %0, i1 true) nounwind readnone
+ %arrayidx4 = getelementptr inbounds i64* %x, i64 %indvars.iv
+ store i64 %call, i64* %arrayidx4, align 8
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/intrinsic.ll b/llvm/test/Transforms/SLPVectorizer/X86/intrinsic.ll
index 30c50936973..937252f4146 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/intrinsic.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/intrinsic.ll
@@ -117,3 +117,270 @@ entry:
; CHECK: store <4 x i32>
; CHECK: ret
}
+
+declare i32 @llvm.ctlz.i32(i32,i1) nounwind readnone
+
+define void @vec_ctlz_i32(i32* %a, i32* %b, i32* %c, i1) {
+entry:
+ %i0 = load i32* %a, align 4
+ %i1 = load i32* %b, align 4
+ %add1 = add i32 %i0, %i1
+ %call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone
+
+ %arrayidx2 = getelementptr inbounds i32* %a, i32 1
+ %i2 = load i32* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds i32* %b, i32 1
+ %i3 = load i32* %arrayidx3, align 4
+ %add2 = add i32 %i2, %i3
+ %call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 true) nounwind readnone
+
+ %arrayidx4 = getelementptr inbounds i32* %a, i32 2
+ %i4 = load i32* %arrayidx4, align 4
+ %arrayidx5 = getelementptr inbounds i32* %b, i32 2
+ %i5 = load i32* %arrayidx5, align 4
+ %add3 = add i32 %i4, %i5
+ %call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone
+
+ %arrayidx6 = getelementptr inbounds i32* %a, i32 3
+ %i6 = load i32* %arrayidx6, align 4
+ %arrayidx7 = getelementptr inbounds i32* %b, i32 3
+ %i7 = load i32* %arrayidx7, align 4
+ %add4 = add i32 %i6, %i7
+ %call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 true) nounwind readnone
+
+ store i32 %call1, i32* %c, align 4
+ %arrayidx8 = getelementptr inbounds i32* %c, i32 1
+ store i32 %call2, i32* %arrayidx8, align 4
+ %arrayidx9 = getelementptr inbounds i32* %c, i32 2
+ store i32 %call3, i32* %arrayidx9, align 4
+ %arrayidx10 = getelementptr inbounds i32* %c, i32 3
+ store i32 %call4, i32* %arrayidx10, align 4
+ ret void
+
+; CHECK-LABEL: @vec_ctlz_i32(
+; CHECK: load <4 x i32>
+; CHECK: load <4 x i32>
+; CHECK: call <4 x i32> @llvm.ctlz.v4i32
+; CHECK: store <4 x i32>
+; CHECK: ret
+}
+
+define void @vec_ctlz_i32_neg(i32* %a, i32* %b, i32* %c, i1) {
+entry:
+ %i0 = load i32* %a, align 4
+ %i1 = load i32* %b, align 4
+ %add1 = add i32 %i0, %i1
+ %call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone
+
+ %arrayidx2 = getelementptr inbounds i32* %a, i32 1
+ %i2 = load i32* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds i32* %b, i32 1
+ %i3 = load i32* %arrayidx3, align 4
+ %add2 = add i32 %i2, %i3
+ %call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 false) nounwind readnone
+
+ %arrayidx4 = getelementptr inbounds i32* %a, i32 2
+ %i4 = load i32* %arrayidx4, align 4
+ %arrayidx5 = getelementptr inbounds i32* %b, i32 2
+ %i5 = load i32* %arrayidx5, align 4
+ %add3 = add i32 %i4, %i5
+ %call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone
+
+ %arrayidx6 = getelementptr inbounds i32* %a, i32 3
+ %i6 = load i32* %arrayidx6, align 4
+ %arrayidx7 = getelementptr inbounds i32* %b, i32 3
+ %i7 = load i32* %arrayidx7, align 4
+ %add4 = add i32 %i6, %i7
+ %call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 false) nounwind readnone
+
+ store i32 %call1, i32* %c, align 4
+ %arrayidx8 = getelementptr inbounds i32* %c, i32 1
+ store i32 %call2, i32* %arrayidx8, align 4
+ %arrayidx9 = getelementptr inbounds i32* %c, i32 2
+ store i32 %call3, i32* %arrayidx9, align 4
+ %arrayidx10 = getelementptr inbounds i32* %c, i32 3
+ store i32 %call4, i32* %arrayidx10, align 4
+ ret void
+
+; CHECK-LABEL: @vec_ctlz_i32_neg(
+; CHECK-NOT: call <4 x i32> @llvm.ctlz.v4i32
+
+}
+
+
+declare i32 @llvm.cttz.i32(i32,i1) nounwind readnone
+
+define void @vec_cttz_i32(i32* %a, i32* %b, i32* %c, i1) {
+entry:
+ %i0 = load i32* %a, align 4
+ %i1 = load i32* %b, align 4
+ %add1 = add i32 %i0, %i1
+ %call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone
+
+ %arrayidx2 = getelementptr inbounds i32* %a, i32 1
+ %i2 = load i32* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds i32* %b, i32 1
+ %i3 = load i32* %arrayidx3, align 4
+ %add2 = add i32 %i2, %i3
+ %call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 true) nounwind readnone
+
+ %arrayidx4 = getelementptr inbounds i32* %a, i32 2
+ %i4 = load i32* %arrayidx4, align 4
+ %arrayidx5 = getelementptr inbounds i32* %b, i32 2
+ %i5 = load i32* %arrayidx5, align 4
+ %add3 = add i32 %i4, %i5
+ %call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone
+
+ %arrayidx6 = getelementptr inbounds i32* %a, i32 3
+ %i6 = load i32* %arrayidx6, align 4
+ %arrayidx7 = getelementptr inbounds i32* %b, i32 3
+ %i7 = load i32* %arrayidx7, align 4
+ %add4 = add i32 %i6, %i7
+ %call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 true) nounwind readnone
+
+ store i32 %call1, i32* %c, align 4
+ %arrayidx8 = getelementptr inbounds i32* %c, i32 1
+ store i32 %call2, i32* %arrayidx8, align 4
+ %arrayidx9 = getelementptr inbounds i32* %c, i32 2
+ store i32 %call3, i32* %arrayidx9, align 4
+ %arrayidx10 = getelementptr inbounds i32* %c, i32 3
+ store i32 %call4, i32* %arrayidx10, align 4
+ ret void
+
+; CHECK-LABEL: @vec_cttz_i32(
+; CHECK: load <4 x i32>
+; CHECK: load <4 x i32>
+; CHECK: call <4 x i32> @llvm.cttz.v4i32
+; CHECK: store <4 x i32>
+; CHECK: ret
+}
+
+define void @vec_cttz_i32_neg(i32* %a, i32* %b, i32* %c, i1) {
+entry:
+ %i0 = load i32* %a, align 4
+ %i1 = load i32* %b, align 4
+ %add1 = add i32 %i0, %i1
+ %call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone
+
+ %arrayidx2 = getelementptr inbounds i32* %a, i32 1
+ %i2 = load i32* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds i32* %b, i32 1
+ %i3 = load i32* %arrayidx3, align 4
+ %add2 = add i32 %i2, %i3
+ %call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 false) nounwind readnone
+
+ %arrayidx4 = getelementptr inbounds i32* %a, i32 2
+ %i4 = load i32* %arrayidx4, align 4
+ %arrayidx5 = getelementptr inbounds i32* %b, i32 2
+ %i5 = load i32* %arrayidx5, align 4
+ %add3 = add i32 %i4, %i5
+ %call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone
+
+ %arrayidx6 = getelementptr inbounds i32* %a, i32 3
+ %i6 = load i32* %arrayidx6, align 4
+ %arrayidx7 = getelementptr inbounds i32* %b, i32 3
+ %i7 = load i32* %arrayidx7, align 4
+ %add4 = add i32 %i6, %i7
+ %call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 false) nounwind readnone
+
+ store i32 %call1, i32* %c, align 4
+ %arrayidx8 = getelementptr inbounds i32* %c, i32 1
+ store i32 %call2, i32* %arrayidx8, align 4
+ %arrayidx9 = getelementptr inbounds i32* %c, i32 2
+ store i32 %call3, i32* %arrayidx9, align 4
+ %arrayidx10 = getelementptr inbounds i32* %c, i32 3
+ store i32 %call4, i32* %arrayidx10, align 4
+ ret void
+
+; CHECK-LABEL: @vec_cttz_i32_neg(
+; CHECK-NOT: call <4 x i32> @llvm.cttz.v4i32
+}
+
+
+declare float @llvm.powi.f32(float, i32)
+define void @vec_powi_f32(float* %a, float* %b, float* %c, i32 %P) {
+entry:
+ %i0 = load float* %a, align 4
+ %i1 = load float* %b, align 4
+ %add1 = fadd float %i0, %i1
+ %call1 = tail call float @llvm.powi.f32(float %add1,i32 %P) nounwind readnone
+
+ %arrayidx2 = getelementptr inbounds float* %a, i32 1
+ %i2 = load float* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds float* %b, i32 1
+ %i3 = load float* %arrayidx3, align 4
+ %add2 = fadd float %i2, %i3
+ %call2 = tail call float @llvm.powi.f32(float %add2,i32 %P) nounwind readnone
+
+ %arrayidx4 = getelementptr inbounds float* %a, i32 2
+ %i4 = load float* %arrayidx4, align 4
+ %arrayidx5 = getelementptr inbounds float* %b, i32 2
+ %i5 = load float* %arrayidx5, align 4
+ %add3 = fadd float %i4, %i5
+ %call3 = tail call float @llvm.powi.f32(float %add3,i32 %P) nounwind readnone
+
+ %arrayidx6 = getelementptr inbounds float* %a, i32 3
+ %i6 = load float* %arrayidx6, align 4
+ %arrayidx7 = getelementptr inbounds float* %b, i32 3
+ %i7 = load float* %arrayidx7, align 4
+ %add4 = fadd float %i6, %i7
+ %call4 = tail call float @llvm.powi.f32(float %add4,i32 %P) nounwind readnone
+
+ store float %call1, float* %c, align 4
+ %arrayidx8 = getelementptr inbounds float* %c, i32 1
+ store float %call2, float* %arrayidx8, align 4
+ %arrayidx9 = getelementptr inbounds float* %c, i32 2
+ store float %call3, float* %arrayidx9, align 4
+ %arrayidx10 = getelementptr inbounds float* %c, i32 3
+ store float %call4, float* %arrayidx10, align 4
+ ret void
+
+; CHECK-LABEL: @vec_powi_f32(
+; CHECK: load <4 x float>
+; CHECK: load <4 x float>
+; CHECK: call <4 x float> @llvm.powi.v4f32
+; CHECK: store <4 x float>
+; CHECK: ret
+}
+
+
+define void @vec_powi_f32_neg(float* %a, float* %b, float* %c, i32 %P, i32 %Q) {
+entry:
+ %i0 = load float* %a, align 4
+ %i1 = load float* %b, align 4
+ %add1 = fadd float %i0, %i1
+ %call1 = tail call float @llvm.powi.f32(float %add1,i32 %P) nounwind readnone
+
+ %arrayidx2 = getelementptr inbounds float* %a, i32 1
+ %i2 = load float* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds float* %b, i32 1
+ %i3 = load float* %arrayidx3, align 4
+ %add2 = fadd float %i2, %i3
+ %call2 = tail call float @llvm.powi.f32(float %add2,i32 %Q) nounwind readnone
+
+ %arrayidx4 = getelementptr inbounds float* %a, i32 2
+ %i4 = load float* %arrayidx4, align 4
+ %arrayidx5 = getelementptr inbounds float* %b, i32 2
+ %i5 = load float* %arrayidx5, align 4
+ %add3 = fadd float %i4, %i5
+ %call3 = tail call float @llvm.powi.f32(float %add3,i32 %P) nounwind readnone
+
+ %arrayidx6 = getelementptr inbounds float* %a, i32 3
+ %i6 = load float* %arrayidx6, align 4
+ %arrayidx7 = getelementptr inbounds float* %b, i32 3
+ %i7 = load float* %arrayidx7, align 4
+ %add4 = fadd float %i6, %i7
+ %call4 = tail call float @llvm.powi.f32(float %add4,i32 %Q) nounwind readnone
+
+ store float %call1, float* %c, align 4
+ %arrayidx8 = getelementptr inbounds float* %c, i32 1
+ store float %call2, float* %arrayidx8, align 4
+ %arrayidx9 = getelementptr inbounds float* %c, i32 2
+ store float %call3, float* %arrayidx9, align 4
+ %arrayidx10 = getelementptr inbounds float* %c, i32 3
+ store float %call4, float* %arrayidx10, align 4
+ ret void
+
+; CHECK-LABEL: @vec_powi_f32_neg(
+; CHECK-NOT: call <4 x float> @llvm.powi.v4f32
+}
OpenPOWER on IntegriCloud