diff options
-rw-r--r-- | llvm/lib/Analysis/VectorUtils.cpp | 38 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 7 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll | 2 |
3 files changed, 24 insertions, 23 deletions
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 4153c843c40..6f25687b635 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -438,21 +438,21 @@ llvm::Value *llvm::getSplatValue(Value *V) { return InsertEltInst->getOperand(1); } -DenseMap<Instruction*, uint64_t> llvm::computeMinimumValueSizes( - ArrayRef<BasicBlock*> Blocks, DemandedBits &DB, - const TargetTransformInfo *TTI) { +DenseMap<Instruction *, uint64_t> +llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB, + const TargetTransformInfo *TTI) { // DemandedBits will give us every value's live-out bits. But we want // to ensure no extra casts would need to be inserted, so every DAG // of connected values must have the same minimum bitwidth. - EquivalenceClasses<Value*> ECs; - SmallVector<Value*,16> Worklist; - SmallPtrSet<Value*,4> Roots; - SmallPtrSet<Value*,16> Visited; - DenseMap<Value*,uint64_t> DBits; - SmallPtrSet<Instruction*,4> InstructionSet; - DenseMap<Instruction*, uint64_t> MinBWs; - + EquivalenceClasses<Value *> ECs; + SmallVector<Value *, 16> Worklist; + SmallPtrSet<Value *, 4> Roots; + SmallPtrSet<Value *, 16> Visited; + DenseMap<Value *, uint64_t> DBits; + SmallPtrSet<Instruction *, 4> InstructionSet; + DenseMap<Instruction *, uint64_t> MinBWs; + // Determine the roots. We work bottom-up, from truncs or icmps. bool SeenExtFromIllegalType = false; for (auto *BB : Blocks) @@ -462,7 +462,7 @@ DenseMap<Instruction*, uint64_t> llvm::computeMinimumValueSizes( if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) && !TTI->isTypeLegal(I.getOperand(0)->getType())) SeenExtFromIllegalType = true; - + // Only deal with non-vector integers up to 64-bits wide. if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) && !I.getType()->isVectorTy() && @@ -471,7 +471,7 @@ DenseMap<Instruction*, uint64_t> llvm::computeMinimumValueSizes( // don't add it to the worklist. if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType())) continue; - + Worklist.push_back(&I); Roots.insert(&I); } @@ -479,12 +479,12 @@ DenseMap<Instruction*, uint64_t> llvm::computeMinimumValueSizes( // Early exit. if (Worklist.empty() || (TTI && !SeenExtFromIllegalType)) return MinBWs; - + // Now proceed breadth-first, unioning values together. while (!Worklist.empty()) { Value *Val = Worklist.pop_back_val(); Value *Leader = ECs.getOrInsertLeaderValue(Val); - + if (Visited.count(Val)) continue; Visited.insert(Val); @@ -497,11 +497,11 @@ DenseMap<Instruction*, uint64_t> llvm::computeMinimumValueSizes( // If we encounter a type that is larger than 64 bits, we can't represent // it so bail out. if (DB.getDemandedBits(I).getBitWidth() > 64) - return DenseMap<Instruction*,uint64_t>(); - + return DenseMap<Instruction *, uint64_t>(); + uint64_t V = DB.getDemandedBits(I).getZExtValue(); DBits[Leader] |= V; - + // Casts, loads and instructions outside of our range terminate a chain // successfully. if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) || @@ -540,7 +540,7 @@ DenseMap<Instruction*, uint64_t> llvm::computeMinimumValueSizes( for (auto *U : I.first->users()) if (U->getType()->isIntegerTy() && DBits.count(U) == 0) DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL; - + for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) { uint64_t LeaderDemandedBits = 0; for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 1a6021cb3c8..d3101c69e01 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2100,7 +2100,6 @@ InnerLoopVectorizer::getVectorValue(Value *V) { // If this scalar is unknown, assume that it is a constant or that it is // loop invariant. Broadcast V and save the value for future uses. Value *B = getBroadcastInstrs(V); - return WidenMap.splat(V, B); } @@ -5409,8 +5408,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { case Instruction::ICmp: case Instruction::FCmp: { Type *ValTy = I->getOperand(0)->getType(); - if (VF > 1 && MinBWs.count(dyn_cast<Instruction>(I->getOperand(0)))) - ValTy = IntegerType::get(ValTy->getContext(), MinBWs[I]); + Instruction *Op0AsInstruction = dyn_cast<Instruction>(I->getOperand(0)); + auto It = MinBWs.find(Op0AsInstruction); + if (VF > 1 && It != MinBWs.end()) + ValTy = IntegerType::get(ValTy->getContext(), It->second); VectorTy = ToVectorTy(ValTy, VF); return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy); } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll index f5b6a643c07..eee31049180 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll @@ -1,4 +1,4 @@ -; RUN: opt -S < %s -basicaa -loop-vectorize -simplifycfg -instsimplify -instcombine -licm -force-vector-interleave=1 2>&1 | FileCheck %s +; RUN: opt -S < %s -basicaa -loop-vectorize -force-vector-interleave=1 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" target triple = "aarch64" |