diff options
author | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2016-04-26 20:18:04 +0000 |
---|---|---|
committer | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2016-04-26 20:18:04 +0000 |
commit | 308a7eb0d23099e14bd03ed58358d6ddfe850315 (patch) | |
tree | 87ce16bf7469b0327b3a68cfaf7f3ecc40d66345 /llvm | |
parent | 4563a06cee2e669a0e075ce98d0484adc7593dd4 (diff) | |
download | bcm5719-llvm-308a7eb0d23099e14bd03ed58358d6ddfe850315.tar.gz bcm5719-llvm-308a7eb0d23099e14bd03ed58358d6ddfe850315.zip |
Masked Store in Loop Vectorizer - bugfix
Fixed a bug in loop vectorization with conditional store.
Differential Revision: http://reviews.llvm.org/D19532
llvm-svn: 267597
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 22 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/X86/max-mstore.ll | 46 |
2 files changed, 55 insertions, 13 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 4250bad3a84..e0cede73afc 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4909,24 +4909,20 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB, if (!SI) return false; + // Build a masked store if it is legal for the target. + if (isLegalMaskedStore(SI->getValueOperand()->getType(), + SI->getPointerOperand()) || + isLegalMaskedScatter(SI->getValueOperand()->getType())) { + MaskedOp.insert(SI); + continue; + } + bool isSafePtr = (SafePtrs.count(SI->getPointerOperand()) != 0); bool isSinglePredecessor = SI->getParent()->getSinglePredecessor(); if (++NumPredStores > NumberOfStoresToPredicate || !isSafePtr || - !isSinglePredecessor) { - // Build a masked store if it is legal for the target, otherwise - // scalarize the block. - bool isLegalMaskedOp = - isLegalMaskedStore(SI->getValueOperand()->getType(), - SI->getPointerOperand()) || - isLegalMaskedScatter(SI->getValueOperand()->getType()); - if (isLegalMaskedOp) { - --NumPredStores; - MaskedOp.insert(SI); - continue; - } + !isSinglePredecessor) return false; - } } if (it->mayThrow()) return false; diff --git a/llvm/test/Transforms/LoopVectorize/X86/max-mstore.ll b/llvm/test/Transforms/LoopVectorize/X86/max-mstore.ll new file mode 100644 index 00000000000..a9ac04d4560 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/max-mstore.ll @@ -0,0 +1,46 @@ +; RUN: opt -basicaa -loop-vectorize -force-vector-interleave=1 -S -mcpu=core-avx2 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@b = common global [256 x i32] zeroinitializer, align 16 +@a = common global [256 x i32] zeroinitializer, align 16 + +; unsigned int a[256], b[256]; +; void foo() { +; for (i = 0; i < 256; i++) { +; if (b[i] > a[i]) +; a[i] = b[i]; +; } +; } + +; CHECK-LABEL: foo +; CHECK: load <8 x i32> +; CHECK: icmp ugt <8 x i32> +; CHECK: masked.store + +define void @foo() { +entry: + br label %for.body + +for.body: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] + %arrayidx = getelementptr inbounds [256 x i32], [256 x i32]* @b, i64 0, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds [256 x i32], [256 x i32]* @a, i64 0, i64 %indvars.iv + %1 = load i32, i32* %arrayidx2, align 4 + %cmp3 = icmp ugt i32 %0, %1 + br i1 %cmp3, label %if.then, label %for.inc + +if.then: ; preds = %for.body + store i32 %0, i32* %arrayidx2, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 256 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc + ret void +} |