diff options
author | Sanjay Patel <spatel@rotateright.com> | 2018-11-13 23:27:23 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2018-11-13 23:27:23 +0000 |
commit | a139564896f2d0d4dc4dc491c4b17c9f01975a26 (patch) | |
tree | 2a2f2ba388d4044d1e62ab0c1f77bcd238b09e5c | |
parent | e0c00718ec2312ce55c79340a2c986a99e279cf9 (diff) | |
download | bcm5719-llvm-a139564896f2d0d4dc4dc491c4b17c9f01975a26.tar.gz bcm5719-llvm-a139564896f2d0d4dc4dc491c4b17c9f01975a26.zip |
[InstCombine] fold funnel shift amount based on demanded bits
The shift amount of a funnel shift is modulo the scalar bitwidth:
http://llvm.org/docs/LangRef.html#llvm-fshl-intrinsic
...so we can use demanded bits analysis on that operand to simplify it
when we have a power-of-2 bitwidth.
This is another step towards canonicalizing {shift/shift/or} to the
intrinsics in IR.
Differential Revision: https://reviews.llvm.org/D54478
llvm-svn: 346814
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 14 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/fsh.ll | 24 |
2 files changed, 24 insertions, 14 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index fae47ec93b9..a99eaf013e6 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1990,6 +1990,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return I; break; + case Intrinsic::fshl: + case Intrinsic::fshr: { + // The shift amount (operand 2) of a funnel shift is modulo the bitwidth, + // so only the low bits of the shift amount are demanded if the bitwidth is + // a power-of-2. + unsigned BitWidth = II->getType()->getScalarSizeInBits(); + if (!isPowerOf2_32(BitWidth)) + break; + APInt Op2Demanded = APInt::getLowBitsSet(BitWidth, Log2_32_Ceil(BitWidth)); + KnownBits Op2Known(BitWidth); + if (SimplifyDemandedBits(II, 2, Op2Demanded, Op2Known)) + return &CI; + break; + } case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: case Intrinsic::umul_with_overflow: diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll index 3c28fa98f58..2e090199475 100644 --- a/llvm/test/Transforms/InstCombine/fsh.ll +++ b/llvm/test/Transforms/InstCombine/fsh.ll @@ -10,9 +10,7 @@ declare <2 x i31> @llvm.fshl.v2i31(<2 x i31>, <2 x i31>, <2 x i31>) define i32 @fshl_mask_simplify1(i32 %x, i32 %y, i32 %sh) { ; CHECK-LABEL: @fshl_mask_simplify1( -; CHECK-NEXT: [[MASKEDSH:%.*]] = and i32 [[SH:%.*]], 32 -; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[MASKEDSH]]) -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: ret i32 [[X:%.*]] ; %maskedsh = and i32 %sh, 32 %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %maskedsh) @@ -21,9 +19,7 @@ define i32 @fshl_mask_simplify1(i32 %x, i32 %y, i32 %sh) { define <2 x i32> @fshr_mask_simplify2(<2 x i32> %x, <2 x i32> %y, <2 x i32> %sh) { ; CHECK-LABEL: @fshr_mask_simplify2( -; CHECK-NEXT: [[MASKEDSH:%.*]] = and <2 x i32> [[SH:%.*]], <i32 64, i32 64> -; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[MASKEDSH]]) -; CHECK-NEXT: ret <2 x i32> [[R]] +; CHECK-NEXT: ret <2 x i32> [[Y:%.*]] ; %maskedsh = and <2 x i32> %sh, <i32 64, i32 64> %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %maskedsh) @@ -43,7 +39,7 @@ define i32 @fshl_mask_simplify3(i32 %x, i32 %y, i32 %sh) { ret i32 %r } -; Check again with weird bitwidths; log2(33) means we demand the low 6 bits. +; Check again with weird bitwidths - the analysis is invalid with non-power-of-2. define i33 @fshr_mask_simplify1(i33 %x, i33 %y, i33 %sh) { ; CHECK-LABEL: @fshr_mask_simplify1( @@ -56,7 +52,7 @@ define i33 @fshr_mask_simplify1(i33 %x, i33 %y, i33 %sh) { ret i33 %r } -; Check again with weird bitwidths; log2(31) means we demand the low 5 bits. +; Check again with weird bitwidths - the analysis is invalid with non-power-of-2. define <2 x i31> @fshl_mask_simplify2(<2 x i31> %x, <2 x i31> %y, <2 x i31> %sh) { ; CHECK-LABEL: @fshl_mask_simplify2( @@ -69,7 +65,7 @@ define <2 x i31> @fshl_mask_simplify2(<2 x i31> %x, <2 x i31> %y, <2 x i31> %sh) ret <2 x i31> %r } -; Negative test. +; Check again with weird bitwidths - the analysis is invalid with non-power-of-2. define i33 @fshr_mask_simplify3(i33 %x, i33 %y, i33 %sh) { ; CHECK-LABEL: @fshr_mask_simplify3( @@ -86,8 +82,7 @@ define i33 @fshr_mask_simplify3(i33 %x, i33 %y, i33 %sh) { define i32 @fshl_mask_not_required(i32 %x, i32 %y, i32 %sh) { ; CHECK-LABEL: @fshl_mask_not_required( -; CHECK-NEXT: [[MASKEDSH:%.*]] = and i32 [[SH:%.*]], 31 -; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[MASKEDSH]]) +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[SH:%.*]]) ; CHECK-NEXT: ret i32 [[R]] ; %maskedsh = and i32 %sh, 31 @@ -99,7 +94,7 @@ define i32 @fshl_mask_not_required(i32 %x, i32 %y, i32 %sh) { define i32 @fshl_mask_reduce_constant(i32 %x, i32 %y, i32 %sh) { ; CHECK-LABEL: @fshl_mask_reduce_constant( -; CHECK-NEXT: [[MASKEDSH:%.*]] = and i32 [[SH:%.*]], 33 +; CHECK-NEXT: [[MASKEDSH:%.*]] = and i32 [[SH:%.*]], 1 ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[MASKEDSH]]) ; CHECK-NEXT: ret i32 [[R]] ; @@ -125,8 +120,7 @@ define i32 @fshl_mask_negative(i32 %x, i32 %y, i32 %sh) { define <2 x i32> @fshr_set_but_not_demanded_vec(<2 x i32> %x, <2 x i32> %y, <2 x i32> %sh) { ; CHECK-LABEL: @fshr_set_but_not_demanded_vec( -; CHECK-NEXT: [[BOGUSBITS:%.*]] = or <2 x i32> [[SH:%.*]], <i32 32, i32 32> -; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[BOGUSBITS]]) +; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[SH:%.*]]) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %bogusbits = or <2 x i32> %sh, <i32 32, i32 32> @@ -134,6 +128,8 @@ define <2 x i32> @fshr_set_but_not_demanded_vec(<2 x i32> %x, <2 x i32> %y, <2 x ret <2 x i32> %r } +; Check again with weird bitwidths - the analysis is invalid with non-power-of-2. + define <2 x i31> @fshl_set_but_not_demanded_vec(<2 x i31> %x, <2 x i31> %y, <2 x i31> %sh) { ; CHECK-LABEL: @fshl_set_but_not_demanded_vec( ; CHECK-NEXT: [[BOGUSBITS:%.*]] = or <2 x i31> [[SH:%.*]], <i31 32, i31 32> |