diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-11-01 15:40:30 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-11-01 15:40:30 +0000 |
commit | 6dd8fab443451e1bbab87186057a80e3088d6d84 (patch) | |
tree | 9f6b1fce4190f8c95dfb4dc26d963275c828a93f | |
parent | 60d6ef63a4e30bbba038d67849fa786314f670f6 (diff) | |
download | bcm5719-llvm-6dd8fab443451e1bbab87186057a80e3088d6d84.tar.gz bcm5719-llvm-6dd8fab443451e1bbab87186057a80e3088d6d84.zip |
[InstCombine] Folding of shifts by the sum of positive values
This patch introduces the combine:
(C1 shift (A add C2)) -> ((C1 shift C2) shift A)
iff A and C2 are both positive
If both A and C2 are know to be positive then we can safely split into 2 shifts, permitting the folding of the Inner shift.
Fix for the spec benchmark case mentioned by @nadav on PR15141 (assuming we can prove that the inputs as positive).
Differential Revision: https://reviews.llvm.org/D26000
llvm-svn: 285696
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp | 11 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/shift-add.ll | 74 |
2 files changed, 84 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 341692fa3f4..5181d233d06 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -39,10 +39,19 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I)) return Res; + // (C1 shift (A add C2)) -> (C1 shift C2) shift A) + // iff A and C2 are both positive. + Value *A; + Constant *C; + if (match(Op0, m_Constant()) && match(Op1, m_Add(m_Value(A), m_Constant(C)))) + if (isKnownNonNegative(A, DL) && isKnownNonNegative(C, DL)) + return BinaryOperator::Create( + I.getOpcode(), Builder->CreateBinOp(I.getOpcode(), Op0, C), A); + // X shift (A srem B) -> X shift (A and B-1) iff B is a power of 2. // Because shifts by negative values (which could occur if A were negative) // are undefined. - Value *A; const APInt *B; + const APInt *B; if (Op1->hasOneUse() && match(Op1, m_SRem(m_Value(A), m_Power2(B)))) { // FIXME: Should this get moved into SimplifyDemandedBits by saying we don't // demand the sign bit (and many others) here?? diff --git a/llvm/test/Transforms/InstCombine/shift-add.ll b/llvm/test/Transforms/InstCombine/shift-add.ll new file mode 100644 index 00000000000..497159f19b6 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/shift-add.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; This test makes sure that these instructions are properly eliminated. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +define i32 @shl_C1_add_A_C2_i32(i16 %A) { +; CHECK-LABEL: @shl_C1_add_A_C2_i32( +; CHECK-NEXT: [[B:%.*]] = zext i16 %A to i32 +; CHECK-NEXT: [[D:%.*]] = shl i32 192, [[B]] +; CHECK-NEXT: ret i32 [[D]] +; + %B = zext i16 %A to i32 + %C = add i32 %B, 5 + %D = shl i32 6, %C + ret i32 %D +} + +define i32 @ashr_C1_add_A_C2_i32(i32 %A) { +; CHECK-LABEL: @ashr_C1_add_A_C2_i32( +; CHECK-NEXT: ret i32 0 +; + %B = and i32 %A, 65535 + %C = add i32 %B, 5 + %D = ashr i32 6, %C + ret i32 %D +} + +define i32 @lshr_C1_add_A_C2_i32(i32 %A) { +; CHECK-LABEL: @lshr_C1_add_A_C2_i32( +; CHECK-NEXT: [[B:%.*]] = and i32 %A, 65535 +; CHECK-NEXT: [[D:%.*]] = shl i32 192, [[B]] +; CHECK-NEXT: ret i32 [[D]] +; + %B = and i32 %A, 65535 + %C = add i32 %B, 5 + %D = shl i32 6, %C + ret i32 %D +} + +define <4 x i32> @shl_C1_add_A_C2_v4i32(<4 x i16> %A) { +; CHECK-LABEL: @shl_C1_add_A_C2_v4i32( +; CHECK-NEXT: [[B:%.*]] = zext <4 x i16> %A to <4 x i32> +; CHECK-NEXT: [[D:%.*]] = shl <4 x i32> <i32 6, i32 4, i32 undef, i32 -458752>, [[B]] +; CHECK-NEXT: ret <4 x i32> [[D]] +; + %B = zext <4 x i16> %A to <4 x i32> + %C = add <4 x i32> %B, <i32 0, i32 1, i32 50, i32 16> + %D = shl <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %C + ret <4 x i32> %D +} + +define <4 x i32> @ashr_C1_add_A_C2_v4i32(<4 x i32> %A) { +; CHECK-LABEL: @ashr_C1_add_A_C2_v4i32( +; CHECK-NEXT: [[B:%.*]] = and <4 x i32> %A, <i32 0, i32 15, i32 255, i32 65535> +; CHECK-NEXT: [[D:%.*]] = ashr <4 x i32> <i32 6, i32 1, i32 undef, i32 -1>, [[B]] +; CHECK-NEXT: ret <4 x i32> [[D]] +; + %B = and <4 x i32> %A, <i32 0, i32 15, i32 255, i32 65535> + %C = add <4 x i32> %B, <i32 0, i32 1, i32 50, i32 16> + %D = ashr <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %C + ret <4 x i32> %D +} + +define <4 x i32> @lshr_C1_add_A_C2_v4i32(<4 x i32> %A) { +; CHECK-LABEL: @lshr_C1_add_A_C2_v4i32( +; CHECK-NEXT: [[B:%.*]] = and <4 x i32> %A, <i32 0, i32 15, i32 255, i32 65535> +; CHECK-NEXT: [[D:%.*]] = lshr <4 x i32> <i32 6, i32 1, i32 undef, i32 65535>, [[B]] +; CHECK-NEXT: ret <4 x i32> [[D]] +; + %B = and <4 x i32> %A, <i32 0, i32 15, i32 255, i32 65535> + %C = add <4 x i32> %B, <i32 0, i32 1, i32 50, i32 16> + %D = lshr <4 x i32> <i32 6, i32 2, i32 1, i32 -7>, %C + ret <4 x i32> %D +} |