Handle non-constant shifts in computeKnownBits, and use computeKnownBits for constant folding in InstCombine/Simplify

First, the motivation: LLVM currently does not realize that: ((2072 >> (L == 0)) >> 7) & 1 == 0 where L is some arbitrary value. Whether you right-shift 2072 by 7 or by 8, the lowest-order bit is always zero. There are obviously several ways to go about fixing this, but the generic solution pursued in this patch is to teach computeKnownBits something about shifts by a non-constant amount. Previously, we would give up completely on these. Instead, in cases where we know something about the low-order bits of the shift-amount operand, we can combine (and together) the associated restrictions for all shift amounts consistent with that knowledge. As a further generalization, I refactored all of the logic for all three kinds of shifts to have this capability. This works well in the above case, for example, because the dynamic shift amount can only be 0 or 1, and thus we can say a lot about the known bits of the result. This brings us to the second part of this change: Even when we know all of the bits of a value via computeKnownBits, nothing used to constant-fold the result. This introduces the necessary code into InstCombine and InstSimplify. I've added it into both because: 1. InstCombine won't automatically pick up the associated logic in InstSimplify (InstCombine uses InstSimplify, but not via the API that passes in the original instruction). 2. Putting the logic in InstCombine allows the resulting simplifications to become part of the iterative worklist 3. Putting the logic in InstSimplify allows the resulting simplifications to be used by everywhere else that calls SimplifyInstruction (inlining, unrolling, and many others). And this requires a small change to our definition of an ephemeral value so that we don't break the rest case from r246696 (where the icmp feeding the @llvm.assume, is also feeding a br). Under the old definition, the icmp would not be considered ephemeral (because it is used by the br), but this causes the assume to remove itself (in addition to simplifying the branch structure), and it seems more-useful to prevent that from happening. llvm-svn: 251146
author: Hal Finkel <hfinkel@anl.gov> 2015-10-23 20:37:08 +0000
committer: Hal Finkel <hfinkel@anl.gov> 2015-10-23 20:37:08 +0000
commit: f2199b21786b209325912ddcc51224305bb0bac1 (patch)
tree: f97e2567c22ec40313992e4ee5e1e81dc5f9f86e /llvm/test/Transforms
parent: d4f55c0b1b878df2aa28f0df382158cb69ed9e74 (diff)
download: bcm5719-llvm-f2199b21786b209325912ddcc51224305bb0bac1.tar.gz
bcm5719-llvm-f2199b21786b209325912ddcc51224305bb0bac1.zip
4 files changed, 72 insertions, 6 deletions
diff --git a/llvm/test/Transforms/InstCombine/all-bits-shift.ll b/llvm/test/Transforms/InstCombine/all-bits-shift.ll
new file mode 100644
index 00000000000..b9eb19cf2ad
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/all-bits-shift.ll
@@ -0,0 +1,46 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+; RUN: opt -S -instsimplify < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@d = global i32 15, align 4
+@b = global i32* @d, align 8
+@a = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define signext i32 @main() #1 {
+entry:
+  %0 = load i32*, i32** @b, align 8
+  %1 = load i32, i32* @a, align 4
+  %lnot = icmp eq i32 %1, 0
+  %lnot.ext = zext i1 %lnot to i32
+  %shr.i = lshr i32 2072, %lnot.ext
+  %call.lobit = lshr i32 %shr.i, 7
+  %2 = and i32 %call.lobit, 1
+  %3 = load i32, i32* %0, align 4
+  %or = or i32 %2, %3
+  store i32 %or, i32* %0, align 4
+  %4 = load i32, i32* @a, align 4
+  %lnot.1 = icmp eq i32 %4, 0
+  %lnot.ext.1 = zext i1 %lnot.1 to i32
+  %shr.i.1 = lshr i32 2072, %lnot.ext.1
+  %call.lobit.1 = lshr i32 %shr.i.1, 7
+  %5 = and i32 %call.lobit.1, 1
+  %or.1 = or i32 %5, %or
+  store i32 %or.1, i32* %0, align 4
+  ret i32 %or.1
+
+; Check that both InstCombine and InstSimplify can use computeKnownBits to
+; realize that:
+;   ((2072 >> (L == 0)) >> 7) & 1
+; is always zero.
+
+; CHECK-LABEL: @main
+; CHECK: %[[V1:[0-9]+]] = load i32*, i32** @b, align 8
+; CHECK: %[[V2:[0-9]+]] = load i32, i32* %[[V1]], align 4
+; CHECK: ret i32 %[[V2]]
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
diff --git a/llvm/test/Transforms/InstCombine/div.ll b/llvm/test/Transforms/InstCombine/div.ll
index 3194c015fd7..27a316113e5 100644
--- a/llvm/test/Transforms/InstCombine/div.ll
+++ b/llvm/test/Transforms/InstCombine/div.ll
@@ -270,9 +270,7 @@ define <2 x i32> @test31(<2 x i32> %x) {
   %div = udiv <2 x i32> %shr, <i32 2147483647, i32 2147483647>
   ret <2 x i32> %div
 ; CHECK-LABEL: @test31(
-; CHECK-NEXT: %[[shr:.*]] = lshr <2 x i32> %x, <i32 31, i32 31>
-; CHECK-NEXT: udiv <2 x i32> %[[shr]], <i32 2147483647, i32 2147483647>
-; CHECK-NEXT: ret <2 x i32>
+; CHECK-NEXT: ret <2 x i32> zeroinitializer
 }
 
 define i32 @test32(i32 %a, i32 %b) {
diff --git a/llvm/test/Transforms/InstCombine/load-combine-metadata.ll b/llvm/test/Transforms/InstCombine/load-combine-metadata.ll
index 9b9c1fe607b..24b26fa4213 100644
--- a/llvm/test/Transforms/InstCombine/load-combine-metadata.ll
+++ b/llvm/test/Transforms/InstCombine/load-combine-metadata.ll
@@ -17,9 +17,9 @@ define void @test_load_load_combine_metadata(i32*, i32*, i32*) {
   ret void
 }
 
-; CHECK: ![[RANGE]] = !{i32 0, i32 1, i32 8, i32 9}
-!0 = !{ i32 0, i32 1 }
-!1 = !{ i32 8, i32 9 }
+; CHECK: ![[RANGE]] = !{i32 0, i32 5, i32 7, i32 9}
+!0 = !{ i32 0, i32 5 }
+!1 = !{ i32 7, i32 9 }
 !2 = !{!2}
 !3 = !{!3, !2}
 !4 = !{!4, !2}
diff --git a/llvm/test/Transforms/InstSimplify/shift-128-kb.ll b/llvm/test/Transforms/InstSimplify/shift-128-kb.ll
new file mode 100644
index 00000000000..3f69ecccaf5
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/shift-128-kb.ll
@@ -0,0 +1,22 @@
+; RUN: opt -S -instsimplify < %s | FileCheck %s
+
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define zeroext i1 @_Z10isNegativemj(i64 %Val, i32 zeroext %IntegerBitWidth) {
+entry:
+  %conv = zext i32 %IntegerBitWidth to i64
+  %sub = sub i64 128, %conv
+  %conv1 = trunc i64 %sub to i32
+  %conv2 = zext i64 %Val to i128
+  %sh_prom = zext i32 %conv1 to i128
+  %shl = shl i128 %conv2, %sh_prom
+  %shr = ashr i128 %shl, %sh_prom
+  %cmp = icmp slt i128 %shr, 0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @_Z10isNegativemj
+; CHECK-NOT: ret i1 false
+; CHECK: ret i1 %cmp
+
author	Hal Finkel <hfinkel@anl.gov>	2015-10-23 20:37:08 +0000
committer	Hal Finkel <hfinkel@anl.gov>	2015-10-23 20:37:08 +0000
commit	f2199b21786b209325912ddcc51224305bb0bac1 (patch)
tree	f97e2567c22ec40313992e4ee5e1e81dc5f9f86e /llvm/test/Transforms
parent	d4f55c0b1b878df2aa28f0df382158cb69ed9e74 (diff)
download	bcm5719-llvm-f2199b21786b209325912ddcc51224305bb0bac1.tar.gz bcm5719-llvm-f2199b21786b209325912ddcc51224305bb0bac1.zip