5 files changed, 146 insertions, 114 deletions
diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/bitreverse-recognize.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/bitreverse-recognize.ll
new file mode 100644
index 00000000000..36440da2162
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/ARM/bitreverse-recognize.ll
@@ -0,0 +1,37 @@
+; RUN: opt -S -loop-unroll -codegenprepare < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7--linux-gnueabihf"
+
+; CHECK-LABEL: @f
+define i32 @f(i32 %a) #0 {
+; CHECK: call i32 @llvm.bitreverse.i32
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret i32 %or
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %b.07 = phi i32 [ 0, %entry ], [ %or, %for.body ]
+  %shr = lshr i32 %a, %i.08
+  %and = and i32 %shr, 1
+  %sub = sub nuw nsw i32 31, %i.08
+  %shl = shl i32 %and, %sub
+  %or = or i32 %shl, %b.07
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, 32
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !3
+}
+
+attributes #0 = { norecurse nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a8" "target-features"="+dsp,+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"min_enum_size", i32 4}
+!2 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git b7441a0f42c43a8eea9e3e706be187252db747fa)"}
+!3 = distinct !{!3, !4}
+!4 = !{!"llvm.loop.unroll.full"}
diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/lit.local.cfg b/llvm/test/Transforms/CodeGenPrepare/ARM/lit.local.cfg
new file mode 100644
index 00000000000..98c6700c209
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/ARM/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'ARM' in config.root.targets:
+    config.unsupported = True
+
diff --git a/llvm/test/Transforms/CodeGenPrepare/bitreverse-hang.ll b/llvm/test/Transforms/CodeGenPrepare/bitreverse-hang.ll
new file mode 100644
index 00000000000..c81dcc15cae
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/bitreverse-hang.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -loop-unroll -codegenprepare -S | FileCheck %s
+
+; This test is a worst-case scenario for bitreversal/byteswap detection.
+; After loop unrolling (the unrolled loop is unreadably large so it has been kept
+; rolled here), we have a binary tree of OR operands (as bitreversal detection
+; looks straight through shifts):
+;
+;  OR
+;  | \
+;  |  LSHR
+;  | /
+;  OR
+;  | \
+;  |  LSHR
+;  | /
+;  OR
+;
+; This results in exponential runtime. The loop here is 32 iterations which will
+; totally hang if we don't deal with this case cleverly.
+
+@b = common global i32 0, align 4
+
+; CHECK: define i32 @fn1
+define i32 @fn1() #0 {
+entry:
+  %b.promoted = load i32, i32* @b, align 4, !tbaa !2
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %or4 = phi i32 [ %b.promoted, %entry ], [ %or, %for.body ]
+  %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %shr = lshr i32 %or4, 1
+  %or = or i32 %shr, %or4
+  %inc = add nuw nsw i32 %i.03, 1
+  %exitcond = icmp eq i32 %inc, 32
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  store i32 %or, i32* @b, align 4, !tbaa !2
+  ret i32 undef
+}
+
+attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git eb70f4e9cc9a4dc3dd57b032fb858d56b4b64a0e)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
diff --git a/llvm/test/Transforms/InstCombine/bitreverse-hang.ll b/llvm/test/Transforms/InstCombine/bitreverse-hang.ll
new file mode 100644
index 00000000000..6823bd0ed65
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/bitreverse-hang.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -loop-unroll -instcombine -S | FileCheck %s
+
+; This test is a worst-case scenario for bitreversal/byteswap detection.
+; After loop unrolling (the unrolled loop is unreadably large so it has been kept
+; rolled here), we have a binary tree of OR operands (as bitreversal detection
+; looks straight through shifts):
+;
+;  OR
+;  | \
+;  |  LSHR
+;  | /
+;  OR
+;  | \
+;  |  LSHR
+;  | /
+;  OR
+;
+; This results in exponential runtime. The loop here is 32 iterations which will
+; totally hang if we don't deal with this case cleverly.
+
+@b = common global i32 0, align 4
+
+; CHECK: define i32 @fn1
+define i32 @fn1() #0 {
+entry:
+  %b.promoted = load i32, i32* @b, align 4, !tbaa !2
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %or4 = phi i32 [ %b.promoted, %entry ], [ %or, %for.body ]
+  %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %shr = lshr i32 %or4, 1
+  %or = or i32 %shr, %or4
+  %inc = add nuw nsw i32 %i.03, 1
+  %exitcond = icmp eq i32 %inc, 32
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  store i32 %or, i32* @b, align 4, !tbaa !2
+  ret i32 undef
+}
+
+attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git eb70f4e9cc9a4dc3dd57b032fb858d56b4b64a0e)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
diff --git a/llvm/test/Transforms/InstCombine/bitreverse-recognize.ll b/llvm/test/Transforms/InstCombine/bitreverse-recognize.ll
deleted file mode 100644
index fbd5cb6d139..00000000000
--- a/llvm/test/Transforms/InstCombine/bitreverse-recognize.ll
+++ /dev/null
@@ -1,114 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.10.0"
-
-define zeroext i8 @f_u8(i8 zeroext %a) {
-; CHECK-LABEL: @f_u8
-; CHECK-NEXT: %[[A:.*]] = call i8 @llvm.bitreverse.i8(i8 %a)
-; CHECK-NEXT: ret i8 %[[A]]
-  %1 = shl i8 %a, 7
-  %2 = shl i8 %a, 5
-  %3 = and i8 %2, 64
-  %4 = shl i8 %a, 3
-  %5 = and i8 %4, 32
-  %6 = shl i8 %a, 1
-  %7 = and i8 %6, 16
-  %8 = lshr i8 %a, 1
-  %9 = and i8 %8, 8
-  %10 = lshr i8 %a, 3
-  %11 = and i8 %10, 4
-  %12 = lshr i8 %a, 5
-  %13 = and i8 %12, 2
-  %14 = lshr i8 %a, 7
-  %15 = or i8 %14, %1
-  %16 = or i8 %15, %3
-  %17 = or i8 %16, %5
-  %18 = or i8 %17, %7
-  %19 = or i8 %18, %9
-  %20 = or i8 %19, %11
-  %21 = or i8 %20, %13
-  ret i8 %21
-}
-
-; The ANDs with 32 and 64 have been swapped here, so the sequence does not
-; completely match a bitreverse.
-define zeroext i8 @f_u8_fail(i8 zeroext %a) {
-; CHECK-LABEL: @f_u8_fail
-; CHECK-NOT: call
-; CHECK: ret i8
-  %1 = shl i8 %a, 7
-  %2 = shl i8 %a, 5
-  %3 = and i8 %2, 32
-  %4 = shl i8 %a, 3
-  %5 = and i8 %4, 64
-  %6 = shl i8 %a, 1
-  %7 = and i8 %6, 16
-  %8 = lshr i8 %a, 1
-  %9 = and i8 %8, 8
-  %10 = lshr i8 %a, 3
-  %11 = and i8 %10, 4
-  %12 = lshr i8 %a, 5
-  %13 = and i8 %12, 2
-  %14 = lshr i8 %a, 7
-  %15 = or i8 %14, %1
-  %16 = or i8 %15, %3
-  %17 = or i8 %16, %5
-  %18 = or i8 %17, %7
-  %19 = or i8 %18, %9
-  %20 = or i8 %19, %11
-  %21 = or i8 %20, %13
-  ret i8 %21
-}
-
-define zeroext i16 @f_u16(i16 zeroext %a) {
-; CHECK-LABEL: @f_u16
-; CHECK-NEXT: %[[A:.*]] = call i16 @llvm.bitreverse.i16(i16 %a)
-; CHECK-NEXT: ret i16 %[[A]]
-  %1 = shl i16 %a, 15
-  %2 = shl i16 %a, 13
-  %3 = and i16 %2, 16384
-  %4 = shl i16 %a, 11
-  %5 = and i16 %4, 8192
-  %6 = shl i16 %a, 9
-  %7 = and i16 %6, 4096
-  %8 = shl i16 %a, 7
-  %9 = and i16 %8, 2048
-  %10 = shl i16 %a, 5
-  %11 = and i16 %10, 1024
-  %12 = shl i16 %a, 3
-  %13 = and i16 %12, 512
-  %14 = shl i16 %a, 1
-  %15 = and i16 %14, 256
-  %16 = lshr i16 %a, 1
-  %17 = and i16 %16, 128
-  %18 = lshr i16 %a, 3
-  %19 = and i16 %18, 64
-  %20 = lshr i16 %a, 5
-  %21 = and i16 %20, 32
-  %22 = lshr i16 %a, 7
-  %23 = and i16 %22, 16
-  %24 = lshr i16 %a, 9
-  %25 = and i16 %24, 8
-  %26 = lshr i16 %a, 11
-  %27 = and i16 %26, 4
-  %28 = lshr i16 %a, 13
-  %29 = and i16 %28, 2
-  %30 = lshr i16 %a, 15
-  %31 = or i16 %30, %1
-  %32 = or i16 %31, %3
-  %33 = or i16 %32, %5
-  %34 = or i16 %33, %7
-  %35 = or i16 %34, %9
-  %36 = or i16 %35, %11
-  %37 = or i16 %36, %13
-  %38 = or i16 %37, %15
-  %39 = or i16 %38, %17
-  %40 = or i16 %39, %19
-  %41 = or i16 %40, %21
-  %42 = or i16 %41, %23
-  %43 = or i16 %42, %25
-  %44 = or i16 %43, %27
-  %45 = or i16 %44, %29
-  ret i16 %45
-}
-\ No newline at end of file