summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorElena Demikhovsky <elena.demikhovsky@intel.com>2016-04-26 20:18:04 +0000
committerElena Demikhovsky <elena.demikhovsky@intel.com>2016-04-26 20:18:04 +0000
commit308a7eb0d23099e14bd03ed58358d6ddfe850315 (patch)
tree87ce16bf7469b0327b3a68cfaf7f3ecc40d66345 /llvm
parent4563a06cee2e669a0e075ce98d0484adc7593dd4 (diff)
downloadbcm5719-llvm-308a7eb0d23099e14bd03ed58358d6ddfe850315.tar.gz
bcm5719-llvm-308a7eb0d23099e14bd03ed58358d6ddfe850315.zip
Masked Store in Loop Vectorizer - bugfix
Fixed a bug in loop vectorization with conditional store. Differential Revision: http://reviews.llvm.org/D19532 llvm-svn: 267597
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp22
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/max-mstore.ll46
2 files changed, 55 insertions, 13 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 4250bad3a84..e0cede73afc 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4909,24 +4909,20 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB,
if (!SI)
return false;
+ // Build a masked store if it is legal for the target.
+ if (isLegalMaskedStore(SI->getValueOperand()->getType(),
+ SI->getPointerOperand()) ||
+ isLegalMaskedScatter(SI->getValueOperand()->getType())) {
+ MaskedOp.insert(SI);
+ continue;
+ }
+
bool isSafePtr = (SafePtrs.count(SI->getPointerOperand()) != 0);
bool isSinglePredecessor = SI->getParent()->getSinglePredecessor();
if (++NumPredStores > NumberOfStoresToPredicate || !isSafePtr ||
- !isSinglePredecessor) {
- // Build a masked store if it is legal for the target, otherwise
- // scalarize the block.
- bool isLegalMaskedOp =
- isLegalMaskedStore(SI->getValueOperand()->getType(),
- SI->getPointerOperand()) ||
- isLegalMaskedScatter(SI->getValueOperand()->getType());
- if (isLegalMaskedOp) {
- --NumPredStores;
- MaskedOp.insert(SI);
- continue;
- }
+ !isSinglePredecessor)
return false;
- }
}
if (it->mayThrow())
return false;
diff --git a/llvm/test/Transforms/LoopVectorize/X86/max-mstore.ll b/llvm/test/Transforms/LoopVectorize/X86/max-mstore.ll
new file mode 100644
index 00000000000..a9ac04d4560
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/max-mstore.ll
@@ -0,0 +1,46 @@
+; RUN: opt -basicaa -loop-vectorize -force-vector-interleave=1 -S -mcpu=core-avx2
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@b = common global [256 x i32] zeroinitializer, align 16
+@a = common global [256 x i32] zeroinitializer, align 16
+
+; unsigned int a[256], b[256];
+; void foo() {
+; for (i = 0; i < 256; i++) {
+; if (b[i] > a[i])
+; a[i] = b[i];
+; }
+; }
+
+; CHECK-LABEL: foo
+; CHECK: load <8 x i32>
+; CHECK: icmp ugt <8 x i32>
+; CHECK: masked.store
+
+define void @foo() {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.inc, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
+ %arrayidx = getelementptr inbounds [256 x i32], [256 x i32]* @b, i64 0, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds [256 x i32], [256 x i32]* @a, i64 0, i64 %indvars.iv
+ %1 = load i32, i32* %arrayidx2, align 4
+ %cmp3 = icmp ugt i32 %0, %1
+ br i1 %cmp3, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ store i32 %0, i32* %arrayidx2, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 256
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.inc
+ ret void
+}
OpenPOWER on IntegriCloud