summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp14
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll108
2 files changed, 117 insertions, 5 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3592df3ede3..a1c642e7344 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5453,7 +5453,7 @@ class HorizontalReduction {
}
};
- Instruction *ReductionRoot = nullptr;
+ WeakTrackingVH ReductionRoot;
/// The operation data of the reduction operation.
OperationData ReductionData;
@@ -5738,7 +5738,7 @@ public:
unsigned ReduxWidth = PowerOf2Floor(NumReducedVals);
Value *VectorizedTree = nullptr;
- IRBuilder<> Builder(ReductionRoot);
+ IRBuilder<> Builder(cast<Instruction>(ReductionRoot));
FastMathFlags Unsafe;
Unsafe.setFast();
Builder.setFastMathFlags(Unsafe);
@@ -5747,8 +5747,13 @@ public:
BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues;
// The same extra argument may be used several time, so log each attempt
// to use it.
- for (auto &Pair : ExtraArgs)
+ for (auto &Pair : ExtraArgs) {
+ assert(Pair.first && "DebugLoc must be set.");
ExternallyUsedValues[Pair.second].push_back(Pair.first);
+ }
+ // The reduction root is used as the insertion point for new instructions,
+ // so set it as externally used to prevent it from being deleted.
+ ExternallyUsedValues[ReductionRoot];
SmallVector<Value *, 16> IgnoreList;
for (auto &V : ReductionOps)
IgnoreList.append(V.begin(), V.end());
@@ -5800,6 +5805,7 @@ public:
Value *VectorizedRoot = V.vectorizeTree(ExternallyUsedValues);
// Emit a reduction.
+ Builder.SetInsertPoint(cast<Instruction>(ReductionRoot));
Value *ReducedSubTree =
emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI);
if (VectorizedTree) {
@@ -5826,8 +5832,6 @@ public:
VectorizedTree = VectReductionData.createOp(Builder, "", ReductionOps);
}
for (auto &Pair : ExternallyUsedValues) {
- assert(!Pair.second.empty() &&
- "At least one DebugLoc must be inserted");
// Add each externally used value to the final reduction.
for (auto *I : Pair.second) {
Builder.SetCurrentDebugLocation(I->getDebugLoc());
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
new file mode 100644
index 00000000000..3ebccb04d07
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
@@ -0,0 +1,108 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake -slp-threshold=-7 | FileCheck %s
+
+define void @Test(i32) {
+; CHECK-LABEL: @Test(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[LOCAL_4_39_US:%.*]] = phi i32 [ [[VAL_42:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[LOCAL_8_43_US:%.*]] = phi i32 [ [[VAL_43:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[VAL_0:%.*]] = add i32 [[LOCAL_4_39_US]], 0
+; CHECK-NEXT: [[VAL_1:%.*]] = and i32 [[LOCAL_8_43_US]], [[VAL_0]]
+; CHECK-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]]
+; CHECK-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]]
+; CHECK-NEXT: [[VAL_4:%.*]] = and i32 [[VAL_3]], [[TMP0]]
+; CHECK-NEXT: [[VAL_5:%.*]] = and i32 [[VAL_4]], [[TMP0]]
+; CHECK-NEXT: [[VAL_6:%.*]] = add i32 [[LOCAL_8_43_US]], 55
+; CHECK-NEXT: [[VAL_7:%.*]] = and i32 [[VAL_5]], [[VAL_6]]
+; CHECK-NEXT: [[VAL_8:%.*]] = and i32 [[VAL_7]], [[TMP0]]
+; CHECK-NEXT: [[VAL_9:%.*]] = and i32 [[VAL_8]], [[TMP0]]
+; CHECK-NEXT: [[VAL_10:%.*]] = and i32 [[VAL_9]], [[TMP0]]
+; CHECK-NEXT: [[VAL_11:%.*]] = add i32 [[LOCAL_8_43_US]], 285
+; CHECK-NEXT: [[VAL_12:%.*]] = and i32 [[VAL_10]], [[VAL_11]]
+; CHECK-NEXT: [[VAL_13:%.*]] = and i32 [[VAL_12]], [[TMP0]]
+; CHECK-NEXT: [[VAL_14:%.*]] = and i32 [[VAL_13]], [[TMP0]]
+; CHECK-NEXT: [[VAL_15:%.*]] = and i32 [[VAL_14]], [[TMP0]]
+; CHECK-NEXT: [[VAL_16:%.*]] = and i32 [[VAL_15]], [[TMP0]]
+; CHECK-NEXT: [[VAL_17:%.*]] = and i32 [[VAL_16]], [[TMP0]]
+; CHECK-NEXT: [[VAL_18:%.*]] = add i32 [[LOCAL_8_43_US]], 1240
+; CHECK-NEXT: [[VAL_19:%.*]] = and i32 [[VAL_17]], [[VAL_18]]
+; CHECK-NEXT: [[VAL_20:%.*]] = add i32 [[LOCAL_8_43_US]], 1496
+; CHECK-NEXT: [[VAL_21:%.*]] = and i32 [[VAL_19]], [[VAL_20]]
+; CHECK-NEXT: [[VAL_22:%.*]] = and i32 [[VAL_21]], [[TMP0]]
+; CHECK-NEXT: [[VAL_23:%.*]] = and i32 [[VAL_22]], [[TMP0]]
+; CHECK-NEXT: [[VAL_24:%.*]] = and i32 [[VAL_23]], [[TMP0]]
+; CHECK-NEXT: [[VAL_25:%.*]] = and i32 [[VAL_24]], [[TMP0]]
+; CHECK-NEXT: [[VAL_26:%.*]] = and i32 [[VAL_25]], [[TMP0]]
+; CHECK-NEXT: [[VAL_27:%.*]] = and i32 [[VAL_26]], [[TMP0]]
+; CHECK-NEXT: [[VAL_28:%.*]] = and i32 [[VAL_27]], [[TMP0]]
+; CHECK-NEXT: [[VAL_29:%.*]] = and i32 [[VAL_28]], [[TMP0]]
+; CHECK-NEXT: [[VAL_30:%.*]] = and i32 [[VAL_29]], [[TMP0]]
+; CHECK-NEXT: [[VAL_31:%.*]] = and i32 [[VAL_30]], [[TMP0]]
+; CHECK-NEXT: [[VAL_32:%.*]] = and i32 [[VAL_31]], [[TMP0]]
+; CHECK-NEXT: [[VAL_33:%.*]] = and i32 [[VAL_32]], [[TMP0]]
+; CHECK-NEXT: [[VAL_34:%.*]] = add i32 [[LOCAL_8_43_US]], 8555
+; CHECK-NEXT: [[VAL_35:%.*]] = and i32 [[VAL_33]], [[VAL_34]]
+; CHECK-NEXT: [[VAL_36:%.*]] = and i32 [[VAL_35]], [[TMP0]]
+; CHECK-NEXT: [[VAL_37:%.*]] = and i32 [[VAL_36]], [[TMP0]]
+; CHECK-NEXT: [[VAL_38:%.*]] = and i32 [[VAL_37]], [[TMP0]]
+; CHECK-NEXT: [[VAL_39:%.*]] = add i32 [[LOCAL_8_43_US]], 12529
+; CHECK-NEXT: [[VAL_40:%.*]] = and i32 [[VAL_38]], [[VAL_39]]
+; CHECK-NEXT: [[VAL_41:%.*]] = add i32 [[LOCAL_8_43_US]], 13685
+; CHECK-NEXT: [[VAL_42]] = and i32 [[VAL_40]], [[VAL_41]]
+; CHECK-NEXT: [[VAL_43]] = add i32 [[LOCAL_8_43_US]], 14910
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ %local_4_39.us = phi i32 [ %val_42, %loop ], [ 0, %entry ]
+ %local_8_43.us = phi i32 [ %val_43, %loop ], [ 0, %entry ]
+ %val_0 = add i32 %local_4_39.us, 0
+ %val_1 = and i32 %local_8_43.us, %val_0
+ %val_2 = and i32 %val_1, %0
+ %val_3 = and i32 %val_2, %0
+ %val_4 = and i32 %val_3, %0
+ %val_5 = and i32 %val_4, %0
+ %val_6 = add i32 %local_8_43.us, 55
+ %val_7 = and i32 %val_5, %val_6
+ %val_8 = and i32 %val_7, %0
+ %val_9 = and i32 %val_8, %0
+ %val_10 = and i32 %val_9, %0
+ %val_11 = add i32 %local_8_43.us, 285
+ %val_12 = and i32 %val_10, %val_11
+ %val_13 = and i32 %val_12, %0
+ %val_14 = and i32 %val_13, %0
+ %val_15 = and i32 %val_14, %0
+ %val_16 = and i32 %val_15, %0
+ %val_17 = and i32 %val_16, %0
+ %val_18 = add i32 %local_8_43.us, 1240
+ %val_19 = and i32 %val_17, %val_18
+ %val_20 = add i32 %local_8_43.us, 1496
+ %val_21 = and i32 %val_19, %val_20
+ %val_22 = and i32 %val_21, %0
+ %val_23 = and i32 %val_22, %0
+ %val_24 = and i32 %val_23, %0
+ %val_25 = and i32 %val_24, %0
+ %val_26 = and i32 %val_25, %0
+ %val_27 = and i32 %val_26, %0
+ %val_28 = and i32 %val_27, %0
+ %val_29 = and i32 %val_28, %0
+ %val_30 = and i32 %val_29, %0
+ %val_31 = and i32 %val_30, %0
+ %val_32 = and i32 %val_31, %0
+ %val_33 = and i32 %val_32, %0
+ %val_34 = add i32 %local_8_43.us, 8555
+ %val_35 = and i32 %val_33, %val_34
+ %val_36 = and i32 %val_35, %0
+ %val_37 = and i32 %val_36, %0
+ %val_38 = and i32 %val_37, %0
+ %val_39 = add i32 %local_8_43.us, 12529
+ %val_40 = and i32 %val_38, %val_39
+ %val_41 = add i32 %local_8_43.us, 13685
+ %val_42 = and i32 %val_40, %val_41
+ %val_43 = add i32 %local_8_43.us, 14910
+ br label %loop
+}
OpenPOWER on IntegriCloud