diff options
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 54 | ||||
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | 5 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstCombine/rotate.ll | 33 |
3 files changed, 61 insertions, 31 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 24a82ba9ae4..404c2ad7e6e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1810,6 +1810,57 @@ Instruction *InstCombiner::matchBSwap(BinaryOperator &Or) { return LastInst; } +/// Transform UB-safe variants of bitwise rotate to the funnel shift intrinsic. +static Instruction *matchRotate(Instruction &Or) { + // TODO: Can we reduce the code duplication between this and the related + // rotate matching code under visitSelect and visitTrunc? + unsigned Width = Or.getType()->getScalarSizeInBits(); + if (!isPowerOf2_32(Width)) + return nullptr; + + // First, find an or'd pair of opposite shifts with the same shifted operand: + // or (lshr ShVal, ShAmt0), (shl ShVal, ShAmt1) + Value *Or0 = Or.getOperand(0), *Or1 = Or.getOperand(1); + Value *ShVal, *ShAmt0, *ShAmt1; + if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal), m_Value(ShAmt0)))) || + !match(Or1, m_OneUse(m_LogicalShift(m_Specific(ShVal), m_Value(ShAmt1))))) + return nullptr; + + auto ShiftOpcode0 = cast<BinaryOperator>(Or0)->getOpcode(); + auto ShiftOpcode1 = cast<BinaryOperator>(Or1)->getOpcode(); + if (ShiftOpcode0 == ShiftOpcode1) + return nullptr; + + // Match the shift amount operands for a rotate pattern. This always matches + // a subtraction on the R operand. + auto matchShiftAmount = [](Value *L, Value *R, unsigned Width) -> Value * { + // The shift amount may be masked with negation: + // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1))) + Value *X; + unsigned Mask = Width - 1; + if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) && + match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))) + return X; + + return nullptr; + }; + + Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width); + bool SubIsOnLHS = false; + if (!ShAmt) { + ShAmt = matchShiftAmount(ShAmt1, ShAmt0, Width); + SubIsOnLHS = true; + } + if (!ShAmt) + return nullptr; + + bool IsFshl = (!SubIsOnLHS && ShiftOpcode0 == BinaryOperator::Shl) || + (SubIsOnLHS && ShiftOpcode1 == BinaryOperator::Shl); + Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr; + Function *F = Intrinsic::getDeclaration(Or.getModule(), IID, Or.getType()); + return IntrinsicInst::Create(F, { ShVal, ShVal, ShAmt }); +} + /// If all elements of two constant vectors are 0/-1 and inverses, return true. static bool areInverseVectorBitmasks(Constant *C1, Constant *C2) { unsigned NumElts = C1->getType()->getVectorNumElements(); @@ -2170,6 +2221,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (Instruction *BSwap = matchBSwap(I)) return BSwap; + if (Instruction *Rotate = matchRotate(I)) + return Rotate; + Value *X, *Y; const APInt *CV; if (match(&I, m_c_Or(m_OneUse(m_Xor(m_Value(X), m_APInt(CV))), m_Value(Y))) && diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index ebbe2afe3ec..faf58a08976 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1557,8 +1557,7 @@ static Instruction *factorizeMinMaxTree(SelectPatternFlavor SPF, Value *LHS, /// funnel shift intrinsic. Example: /// rotl32(a, b) --> (b == 0 ? a : ((a >> (32 - b)) | (a << b))) /// --> call llvm.fshl.i32(a, a, b) -static Instruction *foldSelectRotate(SelectInst &Sel, - InstCombiner::BuilderTy &Builder) { +static Instruction *foldSelectRotate(SelectInst &Sel) { // The false value of the select must be a rotate of the true value. Value *Or0, *Or1; if (!match(Sel.getFalseValue(), m_OneUse(m_Or(m_Value(Or0), m_Value(Or1))))) @@ -2047,7 +2046,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (Instruction *Select = foldSelectBinOpIdentity(SI, TLI)) return Select; - if (Instruction *Rot = foldSelectRotate(SI, Builder)) + if (Instruction *Rot = foldSelectRotate(SI)) return Rot; return nullptr; diff --git a/llvm/test/Transforms/InstCombine/rotate.ll b/llvm/test/Transforms/InstCombine/rotate.ll index cc6735fe393..4b516003a9f 100644 --- a/llvm/test/Transforms/InstCombine/rotate.ll +++ b/llvm/test/Transforms/InstCombine/rotate.ll @@ -213,12 +213,7 @@ define <3 x i42> @rotr_v3i42(<3 x i42> %x, <3 x i42> %y) { define i32 @rotl_safe_i32(i32 %x, i32 %y) { ; CHECK-LABEL: @rotl_safe_i32( -; CHECK-NEXT: [[NEGY:%.*]] = sub i32 0, [[Y:%.*]] -; CHECK-NEXT: [[YMASK:%.*]] = and i32 [[Y]], 31 -; CHECK-NEXT: [[NEGYMASK:%.*]] = and i32 [[NEGY]], 31 -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], [[YMASK]] -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X]], [[NEGYMASK]] -; CHECK-NEXT: [[R:%.*]] = or i32 [[SHR]], [[SHL]] +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]]) ; CHECK-NEXT: ret i32 [[R]] ; %negy = sub i32 0, %y @@ -236,12 +231,9 @@ define i32 @rotl_safe_i32(i32 %x, i32 %y) { define i16 @rotl_safe_i16_commute_extra_use(i16 %x, i16 %y, i16* %p) { ; CHECK-LABEL: @rotl_safe_i16_commute_extra_use( ; CHECK-NEXT: [[NEGY:%.*]] = sub i16 0, [[Y:%.*]] -; CHECK-NEXT: [[YMASK:%.*]] = and i16 [[Y]], 15 ; CHECK-NEXT: [[NEGYMASK:%.*]] = and i16 [[NEGY]], 15 ; CHECK-NEXT: store i16 [[NEGYMASK]], i16* [[P:%.*]], align 2 -; CHECK-NEXT: [[SHL:%.*]] = shl i16 [[X:%.*]], [[YMASK]] -; CHECK-NEXT: [[SHR:%.*]] = lshr i16 [[X]], [[NEGYMASK]] -; CHECK-NEXT: [[R:%.*]] = or i16 [[SHL]], [[SHR]] +; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 [[Y]]) ; CHECK-NEXT: ret i16 [[R]] ; %negy = sub i16 0, %y @@ -259,12 +251,7 @@ define i16 @rotl_safe_i16_commute_extra_use(i16 %x, i16 %y, i16* %p) { define i64 @rotr_safe_i64(i64 %x, i64 %y) { ; CHECK-LABEL: @rotr_safe_i64( -; CHECK-NEXT: [[NEGY:%.*]] = sub i64 0, [[Y:%.*]] -; CHECK-NEXT: [[YMASK:%.*]] = and i64 [[Y]], 63 -; CHECK-NEXT: [[NEGYMASK:%.*]] = and i64 [[NEGY]], 63 -; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[X:%.*]], [[NEGYMASK]] -; CHECK-NEXT: [[SHR:%.*]] = lshr i64 [[X]], [[YMASK]] -; CHECK-NEXT: [[R:%.*]] = or i64 [[SHR]], [[SHL]] +; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Y:%.*]]) ; CHECK-NEXT: ret i64 [[R]] ; %negy = sub i64 0, %y @@ -305,12 +292,7 @@ define i8 @rotr_safe_i8_commute_extra_use(i8 %x, i8 %y, i8* %p) { define <2 x i32> @rotl_safe_v2i32(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @rotl_safe_v2i32( -; CHECK-NEXT: [[NEGY:%.*]] = sub <2 x i32> zeroinitializer, [[Y:%.*]] -; CHECK-NEXT: [[YMASK:%.*]] = and <2 x i32> [[Y]], <i32 31, i32 31> -; CHECK-NEXT: [[NEGYMASK:%.*]] = and <2 x i32> [[NEGY]], <i32 31, i32 31> -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[X:%.*]], [[YMASK]] -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> [[X]], [[NEGYMASK]] -; CHECK-NEXT: [[R:%.*]] = or <2 x i32> [[SHR]], [[SHL]] +; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> [[Y:%.*]]) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %negy = sub <2 x i32> zeroinitializer, %y @@ -327,12 +309,7 @@ define <2 x i32> @rotl_safe_v2i32(<2 x i32> %x, <2 x i32> %y) { define <3 x i16> @rotr_safe_v3i16(<3 x i16> %x, <3 x i16> %y) { ; CHECK-LABEL: @rotr_safe_v3i16( -; CHECK-NEXT: [[NEGY:%.*]] = sub <3 x i16> zeroinitializer, [[Y:%.*]] -; CHECK-NEXT: [[YMASK:%.*]] = and <3 x i16> [[Y]], <i16 15, i16 15, i16 15> -; CHECK-NEXT: [[NEGYMASK:%.*]] = and <3 x i16> [[NEGY]], <i16 15, i16 15, i16 15> -; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i16> [[X:%.*]], [[NEGYMASK]] -; CHECK-NEXT: [[SHR:%.*]] = lshr <3 x i16> [[X]], [[YMASK]] -; CHECK-NEXT: [[R:%.*]] = or <3 x i16> [[SHR]], [[SHL]] +; CHECK-NEXT: [[R:%.*]] = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> [[X:%.*]], <3 x i16> [[X]], <3 x i16> [[Y:%.*]]) ; CHECK-NEXT: ret <3 x i16> [[R]] ; %negy = sub <3 x i16> zeroinitializer, %y |

