[InstCombine] reduce raw IR narrowing rotate patterns to funnel shift

Similar to rL350199 - there are no known analysis/codegen holes for funnel shift intrinsics now, so we can canonicalize the 6+ regular instructions to funnel shift to improve vectorization, inlining, unrolling, etc. llvm-svn: 350419
author: Sanjay Patel <spatel@rotateright.com> 2019-01-04 17:38:12 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2019-01-04 17:38:12 +0000
commit: 722466e1f176f22d2339acb750f93498964b07ee (patch)
tree: cd2ea8ff9fe9c2aac8f4e0041e50448f096ac4a8 /llvm/lib/Transforms
parent: 16a133bd8f860bcfa4c7ae9a3d937c92dca7d861 (diff)
download: bcm5719-llvm-722466e1f176f22d2339acb750f93498964b07ee.tar.gz
bcm5719-llvm-722466e1f176f22d2339acb750f93498964b07ee.zip
1 files changed, 8 insertions, 16 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 7a8c762d494..1201ac196ec 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -492,7 +492,7 @@ static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC) {
 }
 
 /// Rotate left/right may occur in a wider type than necessary because of type
-/// promotion rules. Try to narrow all of the component instructions.
+/// promotion rules. Try to narrow the inputs and convert to funnel shift.
 Instruction *InstCombiner::narrowRotate(TruncInst &Trunc) {
   assert((isa<VectorType>(Trunc.getSrcTy()) ||
           shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
@@ -563,23 +563,15 @@ Instruction *InstCombiner::narrowRotate(TruncInst &Trunc) {
 
   // We have an unnecessarily wide rotate!
   // trunc (or (lshr ShVal, ShAmt), (shl ShVal, BitWidth - ShAmt))
-  // Narrow it down to eliminate the zext/trunc:
-  // or (lshr trunc(ShVal), ShAmt0'), (shl trunc(ShVal), ShAmt1')
+  // Narrow the inputs and convert to funnel shift intrinsic:
+  // llvm.fshl.i8(trunc(ShVal), trunc(ShVal), trunc(ShAmt))
   Value *NarrowShAmt = Builder.CreateTrunc(ShAmt, DestTy);
-  Value *NegShAmt = Builder.CreateNeg(NarrowShAmt);
-
-  // Mask both shift amounts to ensure there's no UB from oversized shifts.
-  Constant *MaskC = ConstantInt::get(DestTy, NarrowWidth - 1);
-  Value *MaskedShAmt = Builder.CreateAnd(NarrowShAmt, MaskC);
-  Value *MaskedNegShAmt = Builder.CreateAnd(NegShAmt, MaskC);
-
-  // Truncate the original value and use narrow ops.
   Value *X = Builder.CreateTrunc(ShVal, DestTy);
-  Value *NarrowShAmt0 = SubIsOnLHS ? MaskedNegShAmt : MaskedShAmt;
-  Value *NarrowShAmt1 = SubIsOnLHS ? MaskedShAmt : MaskedNegShAmt;
-  Value *NarrowSh0 = Builder.CreateBinOp(ShiftOpcode0, X, NarrowShAmt0);
-  Value *NarrowSh1 = Builder.CreateBinOp(ShiftOpcode1, X, NarrowShAmt1);
-  return BinaryOperator::CreateOr(NarrowSh0, NarrowSh1);
+  bool IsFshl = (!SubIsOnLHS && ShiftOpcode0 == BinaryOperator::Shl) ||
+                (SubIsOnLHS && ShiftOpcode1 == BinaryOperator::Shl);
+  Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
+  Function *F = Intrinsic::getDeclaration(Trunc.getModule(), IID, DestTy);
+  return IntrinsicInst::Create(F, { X, X, NarrowShAmt });
 }
 
 /// Try to narrow the width of math or bitwise logic instructions by pulling a
author	Sanjay Patel <spatel@rotateright.com>	2019-01-04 17:38:12 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2019-01-04 17:38:12 +0000
commit	722466e1f176f22d2339acb750f93498964b07ee (patch)
tree	cd2ea8ff9fe9c2aac8f4e0041e50448f096ac4a8 /llvm/lib/Transforms
parent	16a133bd8f860bcfa4c7ae9a3d937c92dca7d861 (diff)
download	bcm5719-llvm-722466e1f176f22d2339acb750f93498964b07ee.tar.gz bcm5719-llvm-722466e1f176f22d2339acb750f93498964b07ee.zip