diff options
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 3 | ||||
| -rw-r--r-- | llvm/test/Transforms/LoopVectorize/i8-induction.ll | 35 | 
2 files changed, 38 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 4bb8c436564..464ed97506f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1033,11 +1033,14 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {    // We may need to extend the index in case there is a type mismatch.    // We know that the count starts at zero and does not overflow. +  unsigned IdxTyBW = IdxTy->getScalarSizeInBits();    if (Count->getType() != IdxTy) {      // The exit count can be of pointer type. Convert it to the correct      // integer type.      if (ExitCount->getType()->isPointerTy())        Count = CastInst::CreatePointerCast(Count, IdxTy, "ptrcnt.to.int", Loc); +    else if (IdxTyBW < Count->getType()->getScalarSizeInBits()) +      Count = CastInst::CreateTruncOrBitCast(Count, IdxTy, "tr.cnt", Loc);      else        Count = CastInst::CreateZExtOrBitCast(Count, IdxTy, "zext.cnt", Loc);    } diff --git a/llvm/test/Transforms/LoopVectorize/i8-induction.ll b/llvm/test/Transforms/LoopVectorize/i8-induction.ll new file mode 100644 index 00000000000..7759b7085a1 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/i8-induction.ll @@ -0,0 +1,35 @@ +; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@a = common global i8 0, align 1 +@b = common global i8 0, align 1 + +define void @f() nounwind uwtable ssp { +scalar.ph: +  store i8 0, i8* inttoptr (i64 1 to i8*), align 1, !tbaa !0 +  %0 = load i8* @a, align 1, !tbaa !0 +  br label %for.body + +for.body: +  %mul16 = phi i8 [ 0, %scalar.ph ], [ %mul, %for.body ]              ; <------- i8 induction var. +  %c.015 = phi i8 [ undef, %scalar.ph ], [ %conv8, %for.body ] +  %conv2 = sext i8 %c.015 to i32 +  %tobool = icmp ne i8 %c.015, 0 +  %.sink = select i1 %tobool, i8 %c.015, i8 %0 +  %mul = mul i8 %mul16, %.sink +  %add = add nsw i32 %conv2, 1 +  %conv8 = trunc i32 %add to i8 +  %sext = shl i32 %add, 24 +  %phitmp14 = icmp slt i32 %sext, 268435456 +  br i1 %phitmp14, label %for.body, label %for.end + +for.end:                                          ; preds = %for.body +  store i8 %mul, i8* @b, align 1, !tbaa !0 +  ret void +} + +!0 = metadata !{metadata !"omnipotent char", metadata !1} +!1 = metadata !{metadata !"Simple C/C++ TBAA"} +  | 

