diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Analysis/VectorUtils.cpp | 18 | ||||
| -rw-r--r-- | llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll | 36 |
2 files changed, 50 insertions, 4 deletions
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 23a0de856bc..2c03f1a05ce 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -320,6 +320,9 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB, SmallPtrSet<Instruction *, 4> InstructionSet; MapVector<Instruction *, uint64_t> MinBWs; + assert(Blocks.size() > 0 && "Must have at least one block!"); + const DataLayout &DL = Blocks[0]->getModule()->getDataLayout(); + // Determine the roots. We work bottom-up, from truncs or icmps. bool SeenExtFromIllegalType = false; for (auto *BB : Blocks) @@ -363,12 +366,19 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB, // If we encounter a type that is larger than 64 bits, we can't represent // it so bail out. - if (DB.getDemandedBits(I).getBitWidth() > 64) + APInt NeededBits = DB.getDemandedBits(I); + unsigned BW = NeededBits.getBitWidth(); + if (BW > 64) return MapVector<Instruction *, uint64_t>(); - uint64_t V = DB.getDemandedBits(I).getZExtValue(); - DBits[Leader] |= V; - DBits[I] = V; + auto NSB = ComputeNumSignBits(I, DL); + + // Query demanded bits for the bits required by the instruction. Remove + // any bits that are equal to the sign bit, because we can truncate the + // instruction without changing their value. + NeededBits &= APInt::getLowBitsSet(BW, BW - NSB); + DBits[Leader] |= NeededBits.getZExtValue(); + DBits[I] |= NeededBits.getZExtValue(); // Casts, loads and instructions outside of our range terminate a chain // successfully. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll index c7ced757581..729592d6f81 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll @@ -263,5 +263,41 @@ for.body: ; preds = %entry, %for.body br i1 %exitcond, label %for.cond.cleanup, label %for.body } +; CHECK-LABEL: @add_g +; CHECK: load <16 x i8> +; CHECK: xor <16 x i8> +; CHECK: icmp ult <16 x i8> +; CHECK: select <16 x i1> {{.*}}, <16 x i8> +; CHECK: store <16 x i8> +define void @add_g(i8* noalias nocapture readonly %p, i8* noalias nocapture readonly %q, i8* noalias nocapture +%r, i8 %arg1, i32 %len) #0 { + %1 = icmp sgt i32 %len, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0 + %2 = sext i8 %arg1 to i64 + br label %3 + +._crit_edge: ; preds = %3, %0 + ret void + +; <label>:3 ; preds = %3, %.lr.ph + %indvars.iv = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next, %3 ] + %x4 = getelementptr inbounds i8, i8* %p, i64 %indvars.iv + %x5 = load i8, i8* %x4 + %x7 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv + %x8 = load i8, i8* %x7 + %x9 = zext i8 %x5 to i32 + %x10 = xor i32 %x9, 255 + %x11 = icmp ult i32 %x10, 24 + %x12 = select i1 %x11, i32 %x10, i32 24 + %x13 = trunc i32 %x12 to i8 + store i8 %x13, i8* %x4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %len + br i1 %exitcond, label %._crit_edge, label %3 +} + attributes #0 = { nounwind } |

