summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h6
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp10
-rw-r--r--llvm/test/Transforms/SLPVectorizer/PowerPC/aggregate.ll19
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/aggregate.ll19
4 files changed, 49 insertions, 5 deletions
diff --git a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
index 06f6fa11a94..25f264c4722 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
@@ -96,11 +96,13 @@ private:
/// \brief Try to vectorize a list of operands.
/// \@param BuildVector A list of users to ignore for the purpose of
- /// scheduling and that don't need extracting.
+ /// scheduling and cost estimation when NeedExtraction
+ /// is false.
/// \returns true if a value was vectorized.
bool tryToVectorizeList(ArrayRef<Value *> VL, slpvectorizer::BoUpSLP &R,
ArrayRef<Value *> BuildVector = None,
- bool AllowReorder = false);
+ bool AllowReorder = false,
+ bool NeedExtraction = false);
/// \brief Try to vectorize a chain that may start at the operands of \p I.
bool tryToVectorize(Instruction *I, slpvectorizer::BoUpSLP &R);
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 9b35f35e870..76ba62f5d59 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4533,7 +4533,8 @@ bool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
ArrayRef<Value *> BuildVector,
- bool AllowReorder) {
+ bool AllowReorder,
+ bool NeedExtraction) {
if (VL.size() < 2)
return false;
@@ -4627,11 +4628,12 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
<< "\n");
ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
+ ArrayRef<Value *> EmptyArray;
ArrayRef<Value *> BuildVectorSlice;
if (!BuildVector.empty())
BuildVectorSlice = BuildVector.slice(I, OpsWidth);
- R.buildTree(Ops, BuildVectorSlice);
+ R.buildTree(Ops, NeedExtraction ? EmptyArray : BuildVectorSlice);
// TODO: check if we can allow reordering for more cases.
if (AllowReorder && R.shouldReorder()) {
// Conceptually, there is nothing actually preventing us from trying to
@@ -5821,7 +5823,9 @@ bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI,
return false;
DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n");
- return tryToVectorizeList(BuildVectorOpds, R, BuildVector, false);
+ // Aggregate value is unlikely to be processed in vector register, we need to
+ // extract scalars into scalar registers, so NeedExtraction is set true.
+ return tryToVectorizeList(BuildVectorOpds, R, BuildVector, false, true);
}
bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
diff --git a/llvm/test/Transforms/SLPVectorizer/PowerPC/aggregate.ll b/llvm/test/Transforms/SLPVectorizer/PowerPC/aggregate.ll
new file mode 100644
index 00000000000..015c1f1ed8f
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/PowerPC/aggregate.ll
@@ -0,0 +1,19 @@
+; RUN: opt -S -mtriple=powerpc64-linux-gnu -mcpu=pwr9 -mattr=+vsx -slp-vectorizer < %s | FileCheck %s
+
+%struct.S = type { i8*, i8* }
+
+@kS0 = common global %struct.S zeroinitializer, align 8
+
+define { i64, i64 } @getS() {
+entry:
+ %0 = load i64, i64* bitcast (%struct.S* @kS0 to i64*), align 8
+ %1 = load i64, i64* bitcast (i8** getelementptr inbounds (%struct.S, %struct.S* @kS0, i64 0, i32 1) to i64*), align 8
+ %2 = insertvalue { i64, i64 } undef, i64 %0, 0
+ %3 = insertvalue { i64, i64 } %2, i64 %1, 1
+ ret { i64, i64 } %3
+}
+
+; CHECK: load i64
+; CHECK-NOT: load <2 x i64>
+; CHECK-NOT: extractelement
+
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/aggregate.ll b/llvm/test/Transforms/SLPVectorizer/X86/aggregate.ll
new file mode 100644
index 00000000000..350929dc539
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/aggregate.ll
@@ -0,0 +1,19 @@
+; RUN: opt -S -mtriple=x86_64-unknown-linux -mcpu=corei7 -slp-vectorizer < %s | FileCheck %s
+
+%struct.S = type { i8*, i8* }
+
+@kS0 = common global %struct.S zeroinitializer, align 8
+
+define { i64, i64 } @getS() {
+entry:
+ %0 = load i64, i64* bitcast (%struct.S* @kS0 to i64*), align 8
+ %1 = load i64, i64* bitcast (i8** getelementptr inbounds (%struct.S, %struct.S* @kS0, i64 0, i32 1) to i64*), align 8
+ %2 = insertvalue { i64, i64 } undef, i64 %0, 0
+ %3 = insertvalue { i64, i64 } %2, i64 %1, 1
+ ret { i64, i64 } %3
+}
+
+; CHECK: load i64
+; CHECK-NOT: load <2 x i64>
+; CHECK-NOT: extractelement
+
OpenPOWER on IntegriCloud