summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/InstCombine/shift-add.ll
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-11-01 15:40:30 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-11-01 15:40:30 +0000
commit6dd8fab443451e1bbab87186057a80e3088d6d84 (patch)
tree9f6b1fce4190f8c95dfb4dc26d963275c828a93f /llvm/test/Transforms/InstCombine/shift-add.ll
parent60d6ef63a4e30bbba038d67849fa786314f670f6 (diff)
downloadbcm5719-llvm-6dd8fab443451e1bbab87186057a80e3088d6d84.tar.gz
bcm5719-llvm-6dd8fab443451e1bbab87186057a80e3088d6d84.zip
[InstCombine] Folding of shifts by the sum of positive values
This patch introduces the combine: (C1 shift (A add C2)) -> ((C1 shift C2) shift A) iff A and C2 are both positive If both A and C2 are know to be positive then we can safely split into 2 shifts, permitting the folding of the Inner shift. Fix for the spec benchmark case mentioned by @nadav on PR15141 (assuming we can prove that the inputs as positive). Differential Revision: https://reviews.llvm.org/D26000 llvm-svn: 285696
Diffstat (limited to 'llvm/test/Transforms/InstCombine/shift-add.ll')
-rw-r--r--llvm/test/Transforms/InstCombine/shift-add.ll74
1 files changed, 74 insertions, 0 deletions
diff --git a/llvm/test/Transforms/InstCombine/shift-add.ll b/llvm/test/Transforms/InstCombine/shift-add.ll
new file mode 100644
index 00000000000..497159f19b6
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/shift-add.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; This test makes sure that these instructions are properly eliminated.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @shl_C1_add_A_C2_i32(i16 %A) {
+; CHECK-LABEL: @shl_C1_add_A_C2_i32(
+; CHECK-NEXT: [[B:%.*]] = zext i16 %A to i32
+; CHECK-NEXT: [[D:%.*]] = shl i32 192, [[B]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %B = zext i16 %A to i32
+ %C = add i32 %B, 5
+ %D = shl i32 6, %C
+ ret i32 %D
+}
+
+define i32 @ashr_C1_add_A_C2_i32(i32 %A) {
+; CHECK-LABEL: @ashr_C1_add_A_C2_i32(
+; CHECK-NEXT: ret i32 0
+;
+ %B = and i32 %A, 65535
+ %C = add i32 %B, 5
+ %D = ashr i32 6, %C
+ ret i32 %D
+}
+
+define i32 @lshr_C1_add_A_C2_i32(i32 %A) {
+; CHECK-LABEL: @lshr_C1_add_A_C2_i32(
+; CHECK-NEXT: [[B:%.*]] = and i32 %A, 65535
+; CHECK-NEXT: [[D:%.*]] = shl i32 192, [[B]]
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %B = and i32 %A, 65535
+ %C = add i32 %B, 5
+ %D = shl i32 6, %C
+ ret i32 %D
+}
+
+define <4 x i32> @shl_C1_add_A_C2_v4i32(<4 x i16> %A) {
+; CHECK-LABEL: @shl_C1_add_A_C2_v4i32(
+; CHECK-NEXT: [[B:%.*]] = zext <4 x i16> %A to <4 x i32>
+; CHECK-NEXT: [[D:%.*]] = shl <4 x i32> <i32 6, i32 4, i32 undef, i32 -458752>, [[B]]
+; CHECK-NEXT: ret <4 x i32> [[D]]
+;
+ %B = zext <4 x i16> %A to <4 x i32>
+ %C = add <4 x i32> %B, <i32 0, i32 1, i32 50, i32 16>
+ %D = shl <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %C
+ ret <4 x i32> %D
+}
+
+define <4 x i32> @ashr_C1_add_A_C2_v4i32(<4 x i32> %A) {
+; CHECK-LABEL: @ashr_C1_add_A_C2_v4i32(
+; CHECK-NEXT: [[B:%.*]] = and <4 x i32> %A, <i32 0, i32 15, i32 255, i32 65535>
+; CHECK-NEXT: [[D:%.*]] = ashr <4 x i32> <i32 6, i32 1, i32 undef, i32 -1>, [[B]]
+; CHECK-NEXT: ret <4 x i32> [[D]]
+;
+ %B = and <4 x i32> %A, <i32 0, i32 15, i32 255, i32 65535>
+ %C = add <4 x i32> %B, <i32 0, i32 1, i32 50, i32 16>
+ %D = ashr <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %C
+ ret <4 x i32> %D
+}
+
+define <4 x i32> @lshr_C1_add_A_C2_v4i32(<4 x i32> %A) {
+; CHECK-LABEL: @lshr_C1_add_A_C2_v4i32(
+; CHECK-NEXT: [[B:%.*]] = and <4 x i32> %A, <i32 0, i32 15, i32 255, i32 65535>
+; CHECK-NEXT: [[D:%.*]] = lshr <4 x i32> <i32 6, i32 1, i32 undef, i32 65535>, [[B]]
+; CHECK-NEXT: ret <4 x i32> [[D]]
+;
+ %B = and <4 x i32> %A, <i32 0, i32 15, i32 255, i32 65535>
+ %C = add <4 x i32> %B, <i32 0, i32 1, i32 50, i32 16>
+ %D = lshr <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %C
+ ret <4 x i32> %D
+}
OpenPOWER on IntegriCloud