3 files changed, 16 insertions, 43 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 19858ae149a..b1e0ffa6fa7 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1547,9 +1547,9 @@ static Instruction *factorizeMinMaxTree(SelectPatternFlavor SPF, Value *LHS,
 }
 
 /// Try to reduce a rotate pattern that includes a compare and select into a
-/// sequence of ALU ops only. Example:
+/// funnel shift intrinsic. Example:
 /// rotl32(a, b) --> (b == 0 ? a : ((a >> (32 - b)) | (a << b)))
-///              --> (a >> (-b & 31)) | (a << (b & 31))
+///              --> call llvm.fshl.i32(a, a, b)
 static Instruction *foldSelectRotate(SelectInst &Sel,
                                      InstCombiner::BuilderTy &Builder) {
   // The false value of the select must be a rotate of the true value.
@@ -1593,17 +1593,12 @@ static Instruction *foldSelectRotate(SelectInst &Sel,
     return nullptr;
 
   // This is a rotate that avoids shift-by-bitwidth UB in a suboptimal way.
-  // Convert to safely bitmasked shifts.
-  // TODO: When we can canonicalize to funnel shift intrinsics without risk of
-  // performance regressions, replace this sequence with that call.
-  Value *NegShAmt = Builder.CreateNeg(ShAmt);
-  Value *MaskedShAmt = Builder.CreateAnd(ShAmt, Width - 1);
-  Value *MaskedNegShAmt = Builder.CreateAnd(NegShAmt, Width - 1);
-  Value *NewSA0 = ShAmt == SA0 ? MaskedShAmt : MaskedNegShAmt;
-  Value *NewSA1 = ShAmt == SA1 ? MaskedShAmt : MaskedNegShAmt;
-  Value *NewSh0 = Builder.CreateBinOp(ShiftOpcode0, TVal, NewSA0);
-  Value *NewSh1 = Builder.CreateBinOp(ShiftOpcode1, TVal, NewSA1);
-  return BinaryOperator::CreateOr(NewSh0, NewSh1);
+  // Convert to funnel shift intrinsic.
+  bool IsFshl = (ShAmt == SA0 && ShiftOpcode0 == BinaryOperator::Shl) ||
+                (ShAmt == SA1 && ShiftOpcode1 == BinaryOperator::Shl);
+  Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
+  Function *F = Intrinsic::getDeclaration(Sel.getModule(), IID, Sel.getType());
+  return IntrinsicInst::Create(F, { TVal, TVal, ShAmt });
 }
 
 Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
diff --git a/llvm/test/Transforms/InstCombine/rotate.ll b/llvm/test/Transforms/InstCombine/rotate.ll
index 492817e47e2..2da7fb48393 100644
--- a/llvm/test/Transforms/InstCombine/rotate.ll
+++ b/llvm/test/Transforms/InstCombine/rotate.ll
@@ -700,12 +700,7 @@ define i9 @rotateleft_9_neg_mask_wide_amount_commute(i9 %v, i33 %shamt) {
 
 define i32 @rotr_select(i32 %x, i32 %shamt) {
 ; CHECK-LABEL: @rotr_select(
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 0, [[SHAMT:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[SHAMT]], 31
-; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP1]], 31
-; CHECK-NEXT:    [[TMP4:%.*]] = lshr i32 [[X:%.*]], [[TMP2]]
-; CHECK-NEXT:    [[TMP5:%.*]] = shl i32 [[X]], [[TMP3]]
-; CHECK-NEXT:    [[R:%.*]] = or i32 [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[SHAMT:%.*]])
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %cmp = icmp eq i32 %shamt, 0
@@ -721,12 +716,7 @@ define i32 @rotr_select(i32 %x, i32 %shamt) {
 
 define i8 @rotr_select_commute(i8 %x, i8 %shamt) {
 ; CHECK-LABEL: @rotr_select_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i8 0, [[SHAMT:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[SHAMT]], 7
-; CHECK-NEXT:    [[TMP3:%.*]] = and i8 [[TMP1]], 7
-; CHECK-NEXT:    [[TMP4:%.*]] = shl i8 [[X:%.*]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = lshr i8 [[X]], [[TMP2]]
-; CHECK-NEXT:    [[R:%.*]] = or i8 [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.fshr.i8(i8 [[X:%.*]], i8 [[X]], i8 [[SHAMT:%.*]])
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %cmp = icmp eq i8 %shamt, 0
@@ -742,12 +732,7 @@ define i8 @rotr_select_commute(i8 %x, i8 %shamt) {
 
 define i16 @rotl_select(i16 %x, i16 %shamt) {
 ; CHECK-LABEL: @rotl_select(
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i16 0, [[SHAMT:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = and i16 [[SHAMT]], 15
-; CHECK-NEXT:    [[TMP3:%.*]] = and i16 [[TMP1]], 15
-; CHECK-NEXT:    [[TMP4:%.*]] = lshr i16 [[X:%.*]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = shl i16 [[X]], [[TMP2]]
-; CHECK-NEXT:    [[R:%.*]] = or i16 [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 [[SHAMT:%.*]])
 ; CHECK-NEXT:    ret i16 [[R]]
 ;
   %cmp = icmp eq i16 %shamt, 0
@@ -763,12 +748,7 @@ define i16 @rotl_select(i16 %x, i16 %shamt) {
 
 define <2 x i64> @rotl_select_commute(<2 x i64> %x, <2 x i64> %shamt) {
 ; CHECK-LABEL: @rotl_select_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i64> zeroinitializer, [[SHAMT:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i64> [[SHAMT]], <i64 63, i64 63>
-; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[TMP1]], <i64 63, i64 63>
-; CHECK-NEXT:    [[TMP4:%.*]] = shl <2 x i64> [[X:%.*]], [[TMP2]]
-; CHECK-NEXT:    [[TMP5:%.*]] = lshr <2 x i64> [[X]], [[TMP3]]
-; CHECK-NEXT:    [[R:%.*]] = or <2 x i64> [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[X:%.*]], <2 x i64> [[X]], <2 x i64> [[SHAMT:%.*]])
 ; CHECK-NEXT:    ret <2 x i64> [[R]]
 ;
   %cmp = icmp eq <2 x i64> %shamt, zeroinitializer
diff --git a/llvm/test/Transforms/PhaseOrdering/rotate.ll b/llvm/test/Transforms/PhaseOrdering/rotate.ll
index 190807668ee..e10a46cb830 100644
--- a/llvm/test/Transforms/PhaseOrdering/rotate.ll
+++ b/llvm/test/Transforms/PhaseOrdering/rotate.ll
@@ -5,6 +5,9 @@
 ; This should become a single funnel shift through a combination
 ; of aggressive-instcombine, simplifycfg, and instcombine.
 ; https://bugs.llvm.org/show_bug.cgi?id=34924
+; These are equivalent, but the value name with the new-pm shows a bug -
+; this code should not have been converted to a speculative select with
+; an intermediate transform.
 
 define i32 @rotl(i32 %a, i32 %b) {
 ; OLDPM-LABEL: @rotl(
@@ -14,12 +17,7 @@ define i32 @rotl(i32 %a, i32 %b) {
 ;
 ; NEWPM-LABEL: @rotl(
 ; NEWPM-NEXT:  entry:
-; NEWPM-NEXT:    [[TMP0:%.*]] = sub i32 0, [[B:%.*]]
-; NEWPM-NEXT:    [[TMP1:%.*]] = and i32 [[B]], 31
-; NEWPM-NEXT:    [[TMP2:%.*]] = and i32 [[TMP0]], 31
-; NEWPM-NEXT:    [[TMP3:%.*]] = lshr i32 [[A:%.*]], [[TMP2]]
-; NEWPM-NEXT:    [[TMP4:%.*]] = shl i32 [[A]], [[TMP1]]
-; NEWPM-NEXT:    [[SPEC_SELECT:%.*]] = or i32 [[TMP3]], [[TMP4]]
+; NEWPM-NEXT:    [[SPEC_SELECT:%.*]] = tail call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B:%.*]])
 ; NEWPM-NEXT:    ret i32 [[SPEC_SELECT]]
 ;
 entry: