diff options
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | 21 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstCombine/rotate.ll | 28 | ||||
| -rw-r--r-- | llvm/test/Transforms/PhaseOrdering/rotate.ll | 10 |
3 files changed, 16 insertions, 43 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 19858ae149a..b1e0ffa6fa7 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1547,9 +1547,9 @@ static Instruction *factorizeMinMaxTree(SelectPatternFlavor SPF, Value *LHS, } /// Try to reduce a rotate pattern that includes a compare and select into a -/// sequence of ALU ops only. Example: +/// funnel shift intrinsic. Example: /// rotl32(a, b) --> (b == 0 ? a : ((a >> (32 - b)) | (a << b))) -/// --> (a >> (-b & 31)) | (a << (b & 31)) +/// --> call llvm.fshl.i32(a, a, b) static Instruction *foldSelectRotate(SelectInst &Sel, InstCombiner::BuilderTy &Builder) { // The false value of the select must be a rotate of the true value. @@ -1593,17 +1593,12 @@ static Instruction *foldSelectRotate(SelectInst &Sel, return nullptr; // This is a rotate that avoids shift-by-bitwidth UB in a suboptimal way. - // Convert to safely bitmasked shifts. - // TODO: When we can canonicalize to funnel shift intrinsics without risk of - // performance regressions, replace this sequence with that call. - Value *NegShAmt = Builder.CreateNeg(ShAmt); - Value *MaskedShAmt = Builder.CreateAnd(ShAmt, Width - 1); - Value *MaskedNegShAmt = Builder.CreateAnd(NegShAmt, Width - 1); - Value *NewSA0 = ShAmt == SA0 ? MaskedShAmt : MaskedNegShAmt; - Value *NewSA1 = ShAmt == SA1 ? MaskedShAmt : MaskedNegShAmt; - Value *NewSh0 = Builder.CreateBinOp(ShiftOpcode0, TVal, NewSA0); - Value *NewSh1 = Builder.CreateBinOp(ShiftOpcode1, TVal, NewSA1); - return BinaryOperator::CreateOr(NewSh0, NewSh1); + // Convert to funnel shift intrinsic. + bool IsFshl = (ShAmt == SA0 && ShiftOpcode0 == BinaryOperator::Shl) || + (ShAmt == SA1 && ShiftOpcode1 == BinaryOperator::Shl); + Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr; + Function *F = Intrinsic::getDeclaration(Sel.getModule(), IID, Sel.getType()); + return IntrinsicInst::Create(F, { TVal, TVal, ShAmt }); } Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { diff --git a/llvm/test/Transforms/InstCombine/rotate.ll b/llvm/test/Transforms/InstCombine/rotate.ll index 492817e47e2..2da7fb48393 100644 --- a/llvm/test/Transforms/InstCombine/rotate.ll +++ b/llvm/test/Transforms/InstCombine/rotate.ll @@ -700,12 +700,7 @@ define i9 @rotateleft_9_neg_mask_wide_amount_commute(i9 %v, i33 %shamt) { define i32 @rotr_select(i32 %x, i32 %shamt) { ; CHECK-LABEL: @rotr_select( -; CHECK-NEXT: [[TMP1:%.*]] = sub i32 0, [[SHAMT:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[SHAMT]], 31 -; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP1]], 31 -; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[X:%.*]], [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = shl i32 [[X]], [[TMP3]] -; CHECK-NEXT: [[R:%.*]] = or i32 [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[SHAMT:%.*]]) ; CHECK-NEXT: ret i32 [[R]] ; %cmp = icmp eq i32 %shamt, 0 @@ -721,12 +716,7 @@ define i32 @rotr_select(i32 %x, i32 %shamt) { define i8 @rotr_select_commute(i8 %x, i8 %shamt) { ; CHECK-LABEL: @rotr_select_commute( -; CHECK-NEXT: [[TMP1:%.*]] = sub i8 0, [[SHAMT:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[SHAMT]], 7 -; CHECK-NEXT: [[TMP3:%.*]] = and i8 [[TMP1]], 7 -; CHECK-NEXT: [[TMP4:%.*]] = shl i8 [[X:%.*]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[X]], [[TMP2]] -; CHECK-NEXT: [[R:%.*]] = or i8 [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.fshr.i8(i8 [[X:%.*]], i8 [[X]], i8 [[SHAMT:%.*]]) ; CHECK-NEXT: ret i8 [[R]] ; %cmp = icmp eq i8 %shamt, 0 @@ -742,12 +732,7 @@ define i8 @rotr_select_commute(i8 %x, i8 %shamt) { define i16 @rotl_select(i16 %x, i16 %shamt) { ; CHECK-LABEL: @rotl_select( -; CHECK-NEXT: [[TMP1:%.*]] = sub i16 0, [[SHAMT:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[SHAMT]], 15 -; CHECK-NEXT: [[TMP3:%.*]] = and i16 [[TMP1]], 15 -; CHECK-NEXT: [[TMP4:%.*]] = lshr i16 [[X:%.*]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = shl i16 [[X]], [[TMP2]] -; CHECK-NEXT: [[R:%.*]] = or i16 [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 [[SHAMT:%.*]]) ; CHECK-NEXT: ret i16 [[R]] ; %cmp = icmp eq i16 %shamt, 0 @@ -763,12 +748,7 @@ define i16 @rotl_select(i16 %x, i16 %shamt) { define <2 x i64> @rotl_select_commute(<2 x i64> %x, <2 x i64> %shamt) { ; CHECK-LABEL: @rotl_select_commute( -; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i64> zeroinitializer, [[SHAMT:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[SHAMT]], <i64 63, i64 63> -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP1]], <i64 63, i64 63> -; CHECK-NEXT: [[TMP4:%.*]] = shl <2 x i64> [[X:%.*]], [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = lshr <2 x i64> [[X]], [[TMP3]] -; CHECK-NEXT: [[R:%.*]] = or <2 x i64> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[R:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[X:%.*]], <2 x i64> [[X]], <2 x i64> [[SHAMT:%.*]]) ; CHECK-NEXT: ret <2 x i64> [[R]] ; %cmp = icmp eq <2 x i64> %shamt, zeroinitializer diff --git a/llvm/test/Transforms/PhaseOrdering/rotate.ll b/llvm/test/Transforms/PhaseOrdering/rotate.ll index 190807668ee..e10a46cb830 100644 --- a/llvm/test/Transforms/PhaseOrdering/rotate.ll +++ b/llvm/test/Transforms/PhaseOrdering/rotate.ll @@ -5,6 +5,9 @@ ; This should become a single funnel shift through a combination ; of aggressive-instcombine, simplifycfg, and instcombine. ; https://bugs.llvm.org/show_bug.cgi?id=34924 +; These are equivalent, but the value name with the new-pm shows a bug - +; this code should not have been converted to a speculative select with +; an intermediate transform. define i32 @rotl(i32 %a, i32 %b) { ; OLDPM-LABEL: @rotl( @@ -14,12 +17,7 @@ define i32 @rotl(i32 %a, i32 %b) { ; ; NEWPM-LABEL: @rotl( ; NEWPM-NEXT: entry: -; NEWPM-NEXT: [[TMP0:%.*]] = sub i32 0, [[B:%.*]] -; NEWPM-NEXT: [[TMP1:%.*]] = and i32 [[B]], 31 -; NEWPM-NEXT: [[TMP2:%.*]] = and i32 [[TMP0]], 31 -; NEWPM-NEXT: [[TMP3:%.*]] = lshr i32 [[A:%.*]], [[TMP2]] -; NEWPM-NEXT: [[TMP4:%.*]] = shl i32 [[A]], [[TMP1]] -; NEWPM-NEXT: [[SPEC_SELECT:%.*]] = or i32 [[TMP3]], [[TMP4]] +; NEWPM-NEXT: [[SPEC_SELECT:%.*]] = tail call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B:%.*]]) ; NEWPM-NEXT: ret i32 [[SPEC_SELECT]] ; entry: |

