summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/IR/IRBuilder.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp73
2 files changed, 39 insertions, 36 deletions
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index b7fa07c6ffa..89338c8b849 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -356,6 +356,7 @@ CallInst *IRBuilderBase::CreateMaskedLoad(Value *Ptr, unsigned Align,
PointerType *PtrTy = cast<PointerType>(Ptr->getType());
Type *DataTy = PtrTy->getElementType();
assert(DataTy->isVectorTy() && "Ptr should point to a vector");
+ assert(Mask && "Mask should not be all-ones (null)");
if (!PassThru)
PassThru = UndefValue::get(DataTy);
Type *OverloadedTypes[] = { DataTy, PtrTy };
@@ -375,6 +376,7 @@ CallInst *IRBuilderBase::CreateMaskedStore(Value *Val, Value *Ptr,
PointerType *PtrTy = cast<PointerType>(Ptr->getType());
Type *DataTy = PtrTy->getElementType();
assert(DataTy->isVectorTy() && "Ptr should point to a vector");
+ assert(Mask && "Mask should not be all-ones (null)");
Type *OverloadedTypes[] = { DataTy, PtrTy };
Value *Ops[] = { Val, Ptr, getInt32(Align), Mask };
return CreateMaskedIntrinsic(Intrinsic::masked_store, Ops, OverloadedTypes);
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 012b10c8a9b..ab2eaf586b8 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3045,13 +3045,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF));
PartPtr =
Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF));
- Mask[Part] = reverseVector(Mask[Part]);
+ if (Mask[Part]) // The reverse of a null all-one mask is a null mask.
+ Mask[Part] = reverseVector(Mask[Part]);
}
Value *VecPtr =
Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
- if (Legal->isMaskRequired(SI))
+ if (Legal->isMaskRequired(SI) && Mask[Part])
NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
Mask[Part]);
else
@@ -3083,12 +3084,13 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
// wide load needs to start at the last vector element.
PartPtr = Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF));
PartPtr = Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF));
- Mask[Part] = reverseVector(Mask[Part]);
+ if (Mask[Part]) // The reverse of a null all-one mask is a null mask.
+ Mask[Part] = reverseVector(Mask[Part]);
}
Value *VecPtr =
Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
- if (Legal->isMaskRequired(LI))
+ if (Legal->isMaskRequired(LI) && Mask[Part])
NewLI = Builder.CreateMaskedLoad(VecPtr, Alignment, Mask[Part],
UndefValue::get(DataTy),
"wide.masked.load");
@@ -3136,10 +3138,10 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
Value *Cmp = nullptr;
if (IfPredicateInstr) {
Cmp = Cond[Part];
- if (Cmp->getType()->isVectorTy())
+ if (!Cmp) // Block in mask is all-one.
+ Cmp = Builder.getTrue();
+ else if (Cmp->getType()->isVectorTy())
Cmp = Builder.CreateExtractElement(Cmp, Builder.getInt32(Lane));
- Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cmp,
- ConstantInt::get(Cmp->getType(), 1));
}
Instruction *Cloned = Instr->clone();
@@ -4518,24 +4520,22 @@ InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
BranchInst *BI = dyn_cast<BranchInst>(Src->getTerminator());
assert(BI && "Unexpected terminator found");
- if (BI->isConditional()) {
+ if (!BI->isConditional())
+ return EdgeMaskCache[Edge] = SrcMask;
- VectorParts EdgeMask(UF);
- for (unsigned Part = 0; Part < UF; ++Part) {
- auto *EdgeMaskPart = getOrCreateVectorValue(BI->getCondition(), Part);
- if (BI->getSuccessor(0) != Dst)
- EdgeMaskPart = Builder.CreateNot(EdgeMaskPart);
+ VectorParts EdgeMask(UF);
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ auto *EdgeMaskPart = getOrCreateVectorValue(BI->getCondition(), Part);
+ if (BI->getSuccessor(0) != Dst)
+ EdgeMaskPart = Builder.CreateNot(EdgeMaskPart);
+ if (SrcMask[Part]) // Otherwise block in-mask is all-one, no need to AND.
EdgeMaskPart = Builder.CreateAnd(EdgeMaskPart, SrcMask[Part]);
- EdgeMask[Part] = EdgeMaskPart;
- }
- EdgeMaskCache[Edge] = EdgeMask;
- return EdgeMask;
+ EdgeMask[Part] = EdgeMaskPart;
}
- EdgeMaskCache[Edge] = SrcMask;
- return SrcMask;
+ return EdgeMaskCache[Edge] = EdgeMask;
}
InnerLoopVectorizer::VectorParts
@@ -4547,31 +4547,32 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
if (BCEntryIt != BlockMaskCache.end())
return BCEntryIt->second;
+ // All-one mask is modelled as no-mask following the convention for masked
+ // load/store/gather/scatter. Initialize BlockMask to no-mask.
VectorParts BlockMask(UF);
+ for (unsigned Part = 0; Part < UF; ++Part)
+ BlockMask[Part] = nullptr;
// Loop incoming mask is all-one.
- if (OrigLoop->getHeader() == BB) {
- Value *C = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 1);
- for (unsigned Part = 0; Part < UF; ++Part)
- BlockMask[Part] = getOrCreateVectorValue(C, Part);
- BlockMaskCache[BB] = BlockMask;
- return BlockMask;
- }
+ if (OrigLoop->getHeader() == BB)
+ return BlockMaskCache[BB] = BlockMask;
- // This is the block mask. We OR all incoming edges, and with zero.
- Value *Zero = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 0);
- for (unsigned Part = 0; Part < UF; ++Part)
- BlockMask[Part] = getOrCreateVectorValue(Zero, Part);
+ // This is the block mask. We OR all incoming edges.
+ for (auto *Predecessor : predecessors(BB)) {
+ VectorParts EdgeMask = createEdgeMask(Predecessor, BB);
+ if (!EdgeMask[0]) // Mask of predecessor is all-one so mask of block is too.
+ return BlockMaskCache[BB] = EdgeMask;
+
+ if (!BlockMask[0]) { // BlockMask has its initialized nullptr value.
+ BlockMask = EdgeMask;
+ continue;
+ }
- // For each pred:
- for (pred_iterator It = pred_begin(BB), E = pred_end(BB); It != E; ++It) {
- VectorParts EM = createEdgeMask(*It, BB);
for (unsigned Part = 0; Part < UF; ++Part)
- BlockMask[Part] = Builder.CreateOr(BlockMask[Part], EM[Part]);
+ BlockMask[Part] = Builder.CreateOr(BlockMask[Part], EdgeMask[Part]);
}
- BlockMaskCache[BB] = BlockMask;
- return BlockMask;
+ return BlockMaskCache[BB] = BlockMask;
}
void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
OpenPOWER on IntegriCloud