diff options
| author | Nikita Popov <nikita.ppv@gmail.com> | 2018-11-24 19:00:45 +0000 |
|---|---|---|
| committer | Nikita Popov <nikita.ppv@gmail.com> | 2018-11-24 19:00:45 +0000 |
| commit | 2c779c0e342b310ead9461d5eac0b58886241c75 (patch) | |
| tree | 5b5d26900984e7ba4633e3291d23e4c59420ab63 /llvm | |
| parent | 42720603c44a398b0ffeeecf8d15dc9adf808e17 (diff) | |
| download | bcm5719-llvm-2c779c0e342b310ead9461d5eac0b58886241c75.tar.gz bcm5719-llvm-2c779c0e342b310ead9461d5eac0b58886241c75.zip | |
[InstCombine] Determine demanded and known bits for funnel shifts
Support funnel shifts in InstCombine demanded bits simplification.
If the shift amount is constant, we can determine both the demanded
bits of the operands, as well as the known bits of the result.
If one of the operands has no demanded bits, it will be replaced
by undef and the funnel shift will be simplified into a simple shift
due to the simplifications added in D54778.
Differential Revision: https://reviews.llvm.org/D54869
llvm-svn: 347515
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 24 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstCombine/fsh.ll | 53 |
2 files changed, 58 insertions, 19 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 45cacc73d63..a193dde1c39 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -690,6 +690,30 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // TODO: Could compute known zero/one bits based on the input. break; } + case Intrinsic::fshr: + case Intrinsic::fshl: { + const APInt *SA; + if (!match(I->getOperand(2), m_APInt(SA))) + break; + + // Normalize to funnel shift left. APInt shifts of BitWidth are well- + // defined, so no need to special-case zero shifts here. + uint64_t ShiftAmt = SA->urem(BitWidth); + if (II->getIntrinsicID() == Intrinsic::fshr) + ShiftAmt = BitWidth - ShiftAmt; + + APInt DemandedMaskLHS(DemandedMask.lshr(ShiftAmt)); + APInt DemandedMaskRHS(DemandedMask.shl(BitWidth - ShiftAmt)); + if (SimplifyDemandedBits(I, 0, DemandedMaskLHS, LHSKnown, Depth + 1) || + SimplifyDemandedBits(I, 1, DemandedMaskRHS, RHSKnown, Depth + 1)) + return I; + + Known.Zero = LHSKnown.Zero.shl(ShiftAmt) | + RHSKnown.Zero.lshr(BitWidth - ShiftAmt); + Known.One = LHSKnown.One.shl(ShiftAmt) | + RHSKnown.One.lshr(BitWidth - ShiftAmt); + break; + } case Intrinsic::x86_mmx_pmovmskb: case Intrinsic::x86_sse_movmsk_ps: case Intrinsic::x86_sse2_movmsk_pd: diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll index db9b92e7076..a9fb826cddc 100644 --- a/llvm/test/Transforms/InstCombine/fsh.ll +++ b/llvm/test/Transforms/InstCombine/fsh.ll @@ -255,7 +255,7 @@ define <2 x i32> @fshr_op1_zero_vec(<2 x i32> %x) { define i32 @fshl_only_op0_demanded(i32 %x, i32 %y) { ; CHECK-LABEL: @fshl_only_op0_demanded( -; CHECK-NEXT: [[Z:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 7) +; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X:%.*]], 7 ; CHECK-NEXT: [[R:%.*]] = and i32 [[Z]], 128 ; CHECK-NEXT: ret i32 [[R]] ; @@ -266,7 +266,7 @@ define i32 @fshl_only_op0_demanded(i32 %x, i32 %y) { define i32 @fshl_only_op1_demanded(i32 %x, i32 %y) { ; CHECK-LABEL: @fshl_only_op1_demanded( -; CHECK-NEXT: [[Z:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 7) +; CHECK-NEXT: [[Z:%.*]] = lshr i32 [[Y:%.*]], 25 ; CHECK-NEXT: [[R:%.*]] = and i32 [[Z]], 63 ; CHECK-NEXT: ret i32 [[R]] ; @@ -275,9 +275,9 @@ define i32 @fshl_only_op1_demanded(i32 %x, i32 %y) { ret i32 %r } -define i33 @fshr_only_op0_demanded(i33 %x, i33 %y) { -; CHECK-LABEL: @fshr_only_op0_demanded( -; CHECK-NEXT: [[Z:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 7) +define i33 @fshr_only_op1_demanded(i33 %x, i33 %y) { +; CHECK-LABEL: @fshr_only_op1_demanded( +; CHECK-NEXT: [[Z:%.*]] = lshr i33 [[Y:%.*]], 7 ; CHECK-NEXT: [[R:%.*]] = and i33 [[Z]], 12392 ; CHECK-NEXT: ret i33 [[R]] ; @@ -286,10 +286,10 @@ define i33 @fshr_only_op0_demanded(i33 %x, i33 %y) { ret i33 %r } -define i33 @fshr_only_op1_demanded(i33 %x, i33 %y) { -; CHECK-LABEL: @fshr_only_op1_demanded( -; CHECK-NEXT: [[Z:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 7) -; CHECK-NEXT: [[R:%.*]] = lshr i33 [[Z]], 30 +define i33 @fshr_only_op0_demanded(i33 %x, i33 %y) { +; CHECK-LABEL: @fshr_only_op0_demanded( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i33 [[X:%.*]], 4 +; CHECK-NEXT: [[R:%.*]] = and i33 [[TMP1]], 7 ; CHECK-NEXT: ret i33 [[R]] ; %z = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 7) @@ -297,6 +297,29 @@ define i33 @fshr_only_op1_demanded(i33 %x, i33 %y) { ret i33 %r } +define <2 x i31> @fshl_only_op1_demanded_vec_splat(<2 x i31> %x, <2 x i31> %y) { +; CHECK-LABEL: @fshl_only_op1_demanded_vec_splat( +; CHECK-NEXT: [[Z:%.*]] = lshr <2 x i31> [[Y:%.*]], <i31 24, i31 24> +; CHECK-NEXT: [[R:%.*]] = and <2 x i31> [[Z]], <i31 63, i31 31> +; CHECK-NEXT: ret <2 x i31> [[R]] +; + %z = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> <i31 7, i31 7>) + %r = and <2 x i31> %z, <i31 63, i31 31> + ret <2 x i31> %r +} + +; The shift modulo bitwidth is the same for all vector elements, but this is not simplified yet. +define <2 x i31> @fshl_only_op1_demanded_vec_nonsplat(<2 x i31> %x, <2 x i31> %y) { +; CHECK-LABEL: @fshl_only_op1_demanded_vec_nonsplat( +; CHECK-NEXT: [[Z:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> <i31 7, i31 38>) +; CHECK-NEXT: [[R:%.*]] = and <2 x i31> [[Z]], <i31 63, i31 31> +; CHECK-NEXT: ret <2 x i31> [[R]] +; + %z = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> <i31 7, i31 38>) + %r = and <2 x i31> %z, <i31 63, i31 31> + ret <2 x i31> %r +} + ; Demand bits from both operands -- cannot simplify. define i32 @fshl_both_ops_demanded(i32 %x, i32 %y) { @@ -325,11 +348,7 @@ define i33 @fshr_both_ops_demanded(i33 %x, i33 %y) { define i32 @fshl_known_bits(i32 %x, i32 %y) { ; CHECK-LABEL: @fshl_known_bits( -; CHECK-NEXT: [[X2:%.*]] = or i32 [[X:%.*]], 1 -; CHECK-NEXT: [[Y2:%.*]] = lshr i32 [[Y:%.*]], 1 -; CHECK-NEXT: [[Z:%.*]] = call i32 @llvm.fshl.i32(i32 [[X2]], i32 [[Y2]], i32 7) -; CHECK-NEXT: [[R:%.*]] = and i32 [[Z]], 192 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: ret i32 128 ; %x2 = or i32 %x, 1 ; lo bit set %y2 = lshr i32 %y, 1 ; hi bit clear @@ -340,11 +359,7 @@ define i32 @fshl_known_bits(i32 %x, i32 %y) { define i33 @fshr_known_bits(i33 %x, i33 %y) { ; CHECK-LABEL: @fshr_known_bits( -; CHECK-NEXT: [[X2:%.*]] = or i33 [[X:%.*]], 1 -; CHECK-NEXT: [[Y2:%.*]] = lshr i33 [[Y:%.*]], 1 -; CHECK-NEXT: [[Z:%.*]] = call i33 @llvm.fshr.i33(i33 [[X2]], i33 [[Y2]], i33 26) -; CHECK-NEXT: [[R:%.*]] = and i33 [[Z]], 192 -; CHECK-NEXT: ret i33 [[R]] +; CHECK-NEXT: ret i33 128 ; %x2 = or i33 %x, 1 ; lo bit set %y2 = lshr i33 %y, 1 ; hi bit set |

