diff options
author | Sanjay Patel <spatel@rotateright.com> | 2018-11-13 22:47:24 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2018-11-13 22:47:24 +0000 |
commit | f8f12272e80fd37f6ee48b7ffef9d7296dcd2e33 (patch) | |
tree | 259dcf63ddac790b771d072158e2470d6744dd8c /llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | |
parent | e19dc6137fadf44aac0385357e3169350a2061c0 (diff) | |
download | bcm5719-llvm-f8f12272e80fd37f6ee48b7ffef9d7296dcd2e33.tar.gz bcm5719-llvm-f8f12272e80fd37f6ee48b7ffef9d7296dcd2e33.zip |
[InstCombine] canonicalize rotate patterns with cmp/select
The cmp+branch variant of this pattern is shown in:
https://bugs.llvm.org/show_bug.cgi?id=34924
...and as discussed there, we probably can't transform
that without a rotate intrinsic. We do have that now
via funnel shift, but we're not quite ready to
canonicalize IR to that form yet. The case with 'select'
should already be transformed though, so that's this patch.
The sequence with negation followed by masking is what we
use in the backend and partly in clang (though that part
should be updated).
https://rise4fun.com/Alive/TplC
%cmp = icmp eq i32 %shamt, 0
%sub = sub i32 32, %shamt
%shr = lshr i32 %x, %shamt
%shl = shl i32 %x, %sub
%or = or i32 %shr, %shl
%r = select i1 %cmp, i32 %x, i32 %or
=>
%neg = sub i32 0, %shamt
%masked = and i32 %shamt, 31
%maskedneg = and i32 %neg, 31
%shl2 = lshr i32 %x, %masked
%shr2 = shl i32 %x, %maskedneg
%r = or i32 %shl2, %shr2
llvm-svn: 346807
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 88a72bb8eb5..26d0b522f01 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1546,6 +1546,66 @@ static Instruction *factorizeMinMaxTree(SelectPatternFlavor SPF, Value *LHS, return SelectInst::Create(CmpABC, MinMaxOp, ThirdOp); } +/// Try to reduce a rotate pattern that includes a compare and select into a +/// sequence of ALU ops only. Example: +/// rotl32(a, b) --> (b == 0 ? a : ((a >> (32 - b)) | (a << b))) +/// --> (a >> (-b & 31)) | (a << (b & 31)) +static Instruction *foldSelectRotate(SelectInst &Sel, + InstCombiner::BuilderTy &Builder) { + // The false value of the select must be a rotate of the true value. + Value *Or0, *Or1; + if (!match(Sel.getFalseValue(), m_OneUse(m_Or(m_Value(Or0), m_Value(Or1))))) + return nullptr; + + Value *TVal = Sel.getTrueValue(); + Value *SA0, *SA1; + if (!match(Or0, m_OneUse(m_LogicalShift(m_Specific(TVal), m_Value(SA0)))) || + !match(Or1, m_OneUse(m_LogicalShift(m_Specific(TVal), m_Value(SA1))))) + return nullptr; + + auto ShiftOpcode0 = cast<BinaryOperator>(Or0)->getOpcode(); + auto ShiftOpcode1 = cast<BinaryOperator>(Or1)->getOpcode(); + if (ShiftOpcode0 == ShiftOpcode1) + return nullptr; + + // We have one of these patterns so far: + // select ?, TVal, (or (lshr TVal, SA0), (shl TVal, SA1)) + // select ?, TVal, (or (shl TVal, SA0), (lshr TVal, SA1)) + // This must be a power-of-2 rotate for a bitmasking transform to be valid. + unsigned Width = Sel.getType()->getScalarSizeInBits(); + if (!isPowerOf2_32(Width)) + return nullptr; + + // Check the shift amounts to see if they are an opposite pair. + Value *ShAmt; + if (match(SA1, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(SA0))))) + ShAmt = SA0; + else if (match(SA0, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(SA1))))) + ShAmt = SA1; + else + return nullptr; + + // Finally, see if the select is filtering out a shift-by-zero. + Value *Cond = Sel.getCondition(); + ICmpInst::Predicate Pred; + if (!match(Cond, m_OneUse(m_ICmp(Pred, m_Specific(ShAmt), m_ZeroInt()))) || + Pred != ICmpInst::ICMP_EQ) + return nullptr; + + // This is a rotate that avoids shift-by-bitwidth UB in a suboptimal way. + // Convert to safely bitmasked shifts. + // TODO: When we can canonicalize to funnel shift intrinsics without risk of + // performance regressions, replace this sequence with that call. + Value *NegShAmt = Builder.CreateNeg(ShAmt); + Value *MaskedShAmt = Builder.CreateAnd(ShAmt, Width - 1); + Value *MaskedNegShAmt = Builder.CreateAnd(NegShAmt, Width - 1); + Value *NewSA0 = ShAmt == SA0 ? MaskedShAmt : MaskedNegShAmt; + Value *NewSA1 = ShAmt == SA1 ? MaskedShAmt : MaskedNegShAmt; + Value *NewSh0 = Builder.CreateBinOp(ShiftOpcode0, TVal, NewSA0); + Value *NewSh1 = Builder.CreateBinOp(ShiftOpcode1, TVal, NewSA1); + return BinaryOperator::CreateOr(NewSh0, NewSh1); +} + Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *CondVal = SI.getCondition(); Value *TrueVal = SI.getTrueValue(); @@ -2010,5 +2070,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (Instruction *Select = foldSelectBinOpIdentity(SI, TLI)) return Select; + if (Instruction *Rot = foldSelectRotate(SI, Builder)) + return Rot; + return nullptr; } |