summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Analysis/VectorUtils.cpp18
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll36
2 files changed, 50 insertions, 4 deletions
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 23a0de856bc..2c03f1a05ce 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -320,6 +320,9 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
SmallPtrSet<Instruction *, 4> InstructionSet;
MapVector<Instruction *, uint64_t> MinBWs;
+ assert(Blocks.size() > 0 && "Must have at least one block!");
+ const DataLayout &DL = Blocks[0]->getModule()->getDataLayout();
+
// Determine the roots. We work bottom-up, from truncs or icmps.
bool SeenExtFromIllegalType = false;
for (auto *BB : Blocks)
@@ -363,12 +366,19 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
// If we encounter a type that is larger than 64 bits, we can't represent
// it so bail out.
- if (DB.getDemandedBits(I).getBitWidth() > 64)
+ APInt NeededBits = DB.getDemandedBits(I);
+ unsigned BW = NeededBits.getBitWidth();
+ if (BW > 64)
return MapVector<Instruction *, uint64_t>();
- uint64_t V = DB.getDemandedBits(I).getZExtValue();
- DBits[Leader] |= V;
- DBits[I] = V;
+ auto NSB = ComputeNumSignBits(I, DL);
+
+ // Query demanded bits for the bits required by the instruction. Remove
+ // any bits that are equal to the sign bit, because we can truncate the
+ // instruction without changing their value.
+ NeededBits &= APInt::getLowBitsSet(BW, BW - NSB);
+ DBits[Leader] |= NeededBits.getZExtValue();
+ DBits[I] |= NeededBits.getZExtValue();
// Casts, loads and instructions outside of our range terminate a chain
// successfully.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
index c7ced757581..729592d6f81 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
@@ -263,5 +263,41 @@ for.body: ; preds = %entry, %for.body
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
+; CHECK-LABEL: @add_g
+; CHECK: load <16 x i8>
+; CHECK: xor <16 x i8>
+; CHECK: icmp ult <16 x i8>
+; CHECK: select <16 x i1> {{.*}}, <16 x i8>
+; CHECK: store <16 x i8>
+define void @add_g(i8* noalias nocapture readonly %p, i8* noalias nocapture readonly %q, i8* noalias nocapture
+%r, i8 %arg1, i32 %len) #0 {
+ %1 = icmp sgt i32 %len, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0
+ %2 = sext i8 %arg1 to i64
+ br label %3
+
+._crit_edge: ; preds = %3, %0
+ ret void
+
+; <label>:3 ; preds = %3, %.lr.ph
+ %indvars.iv = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next, %3 ]
+ %x4 = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
+ %x5 = load i8, i8* %x4
+ %x7 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
+ %x8 = load i8, i8* %x7
+ %x9 = zext i8 %x5 to i32
+ %x10 = xor i32 %x9, 255
+ %x11 = icmp ult i32 %x10, 24
+ %x12 = select i1 %x11, i32 %x10, i32 24
+ %x13 = trunc i32 %x12 to i8
+ store i8 %x13, i8* %x4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %len
+ br i1 %exitcond, label %._crit_edge, label %3
+}
+
attributes #0 = { nounwind }
OpenPOWER on IntegriCloud