diff options
author | Roman Lebedev <lebedev.ri@gmail.com> | 2019-11-22 15:16:03 +0300 |
---|---|---|
committer | Roman Lebedev <lebedev.ri@gmail.com> | 2019-11-22 15:16:03 +0300 |
commit | 3f46022e33bd33b3d8f816be3c3adbe7de806119 (patch) | |
tree | c34bfe814234f3e0462b234b6e3c3404599effe1 /llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | |
parent | 06e03bce802e35a7401ab0849515ad1ce0dd21f5 (diff) | |
download | bcm5719-llvm-3f46022e33bd33b3d8f816be3c3adbe7de806119.tar.gz bcm5719-llvm-3f46022e33bd33b3d8f816be3c3adbe7de806119.zip |
[Codegen] TargetLowering::prepareUREMEqFold(): `x u% C1 ==/!= C2` with tautological C1 u<= C2 (PR35479)
Summary:
This is a preparatory cleanup before i add more
of this fold to deal with comparisons with non-zero.
In essence, the current lowering is:
```
Name: (X % C1) == 0 -> X * C3 <= C4
Pre: (C1 u>> countTrailingZeros(C1)) * C3 == 1
%zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition
%o0 = urem i8 %x, C1
%r = icmp eq i8 %o0, 0
=>
%zz = and i8 C3, 0 ; and silence it from complaining about said reg
%C4 = -1 /u C1
%n0 = mul i8 %x, C3
%n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right
%n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right
%n3 = or i8 %n1, %n2 ; rotate right
%r = icmp ule i8 %n3, %C4
```
https://rise4fun.com/Alive/oqd
It kinda just works, really no weird edge-cases.
But it isn't all that great for when comparing with non-zero.
In particular, given `(X % C1) == C2`, there will be problems
in the always-false tautological case where `C2 u>= C1`:
https://rise4fun.com/Alive/pH3
That case is tautological, always-false:
```
Name: (X % Y) u>= Y
%o0 = urem i8 %x, %y
%r = icmp uge i8 %o0, %y
=>
%r = false
```
https://rise4fun.com/Alive/ofu
While we can't/shouldn't get such tautological case normally,
we do deal with non-splat vectors, so unless we want to give up
in this case, we need to fixup/short-circuit such lanes.
There are two lowering variants:
1. We can blend between whatever computed result and the correct tautological result
```
Name: (X % C1) == C2 -> X * C3 <= C4 || false
Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1
%zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition
%o0 = urem i8 %x, C1
%r = icmp eq i8 %o0, C2
=>
%zz = and i8 C3, 0 ; and silence it from complaining about said reg
%C4 = -1 /u C1
%n0 = mul i8 %x, C3
%n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right
%n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right
%n3 = or i8 %n1, %n2 ; rotate right
%is_tautologically_false = icmp ule i8 C1, C2
%res = icmp ule i8 %n3, %C4
%r = select i1 %is_tautologically_false, i1 0, i1 %res
```
https://rise4fun.com/Alive/PjT5
https://rise4fun.com/Alive/1KV
2. We can invert the comparison result
```
Name: (X % C1) == C2 -> X * C3 <= C4 || false
Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1
%zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition
%o0 = urem i8 %x, C1
%r = icmp eq i8 %o0, C2
=>
%zz = and i8 C3, 0 ; and silence it from complaining about said reg
%C4 = -1 /u C1
%n0 = mul i8 %x, C3
%n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right
%n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right
%n3 = or i8 %n1, %n2 ; rotate right
%is_tautologically_false = icmp ule i8 C1, C2
%C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4
%res = icmp ule i8 %n3, %C4_fixed
%r = xor i1 %res, %is_tautologically_false
```
https://rise4fun.com/Alive/2xC
https://rise4fun.com/Alive/jpb5
3. We can expand into `and`/`or`:
https://rise4fun.com/Alive/WGn
https://rise4fun.com/Alive/lcb5
Blend-one is likely better since we avoid having to load the
replacement from constant pool. `xor` is second best since
it's still pretty general. I'm not adding `and`/`or` variants.
Reviewers: RKSimon, craig.topper, spatel
Reviewed By: RKSimon
Subscribers: nick, hiraditya, xbolva00, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70051
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 98 |
1 files changed, 73 insertions, 25 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 047cab5cb5f..9a9ac690aaa 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -4943,7 +4943,7 @@ SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode, ISD::CondCode Cond, DAGCombinerInfo &DCI, const SDLoc &DL) const { - SmallVector<SDNode *, 2> Built; + SmallVector<SDNode *, 4> Built; if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond, DCI, DL, Built)) { for (SDNode *N : Built) @@ -4978,26 +4978,40 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, if (!isOperationLegalOrCustom(ISD::MUL, VT)) return SDValue(); - // TODO: Could support comparing with non-zero too. - ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode); - if (!CompTarget || !CompTarget->isNullValue()) - return SDValue(); - - bool HadOneDivisor = false; - bool AllDivisorsAreOnes = true; + bool HadTautologicalLanes = false; + bool AllLanesAreTautological = true; bool HadEvenDivisor = false; bool AllDivisorsArePowerOfTwo = true; - SmallVector<SDValue, 16> PAmts, KAmts, QAmts; + bool HadTautologicalInvertedLanes = false; + SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts; - auto BuildUREMPattern = [&](ConstantSDNode *C) { + auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) { // Division by 0 is UB. Leave it to be constant-folded elsewhere. - if (C->isNullValue()) + if (CDiv->isNullValue()) return false; - const APInt &D = C->getAPIntValue(); - // If all divisors are ones, we will prefer to avoid the fold. - HadOneDivisor |= D.isOneValue(); - AllDivisorsAreOnes &= D.isOneValue(); + const APInt &D = CDiv->getAPIntValue(); + const APInt &Cmp = CCmp->getAPIntValue(); + + // x u% C1` is *always* less than C1. So given `x u% C1 == C2`, + // if C2 is not less than C1, the comparison is always false. + // But we will only be able to produce the comparison that will give the + // opposive tautological answer. So this lane would need to be fixed up. + bool TautologicalInvertedLane = D.ule(Cmp); + HadTautologicalInvertedLanes |= TautologicalInvertedLane; + + // If we are checking that remainder is something smaller than the divisor, + // then this comparison isn't tautological. For now this is not handled, + // other than the comparison that remainder is zero. + if (!Cmp.isNullValue() && !TautologicalInvertedLane) + return false; + + // If all lanes are tautological (either all divisors are ones, or divisor + // is not greater than the constant we are comparing with), + // we will prefer to avoid the fold. + bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane; + HadTautologicalLanes |= TautologicalLane; + AllLanesAreTautological &= TautologicalLane; // Decompose D into D0 * 2^K unsigned K = D.countTrailingZeros(); @@ -5025,13 +5039,14 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && "We are expecting that K is always less than all-ones for ShSVT"); - // If the divisor is 1 the result can be constant-folded. - if (D.isOneValue()) { + // If the lane is tautological the result can be constant-folded. + if (TautologicalLane) { // Set P and K amount to a bogus values so we can try to splat them. P = 0; K = -1; - assert(Q.isAllOnesValue() && - "Expecting all-ones comparison for one divisor"); + // And ensure that comparison constant is tautological, + // it will always compare true/false. + Q = -1; } PAmts.push_back(DAG.getConstant(P, DL, SVT)); @@ -5045,11 +5060,11 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue D = REMNode.getOperand(1); // Collect the values from each element. - if (!ISD::matchUnaryPredicate(D, BuildUREMPattern)) + if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern)) return SDValue(); - // If this is a urem by a one, avoid the fold since it can be constant-folded. - if (AllDivisorsAreOnes) + // If all lanes are tautological, the result can be constant-folded. + if (AllLanesAreTautological) return SDValue(); // If this is a urem by a powers-of-two, avoid the fold since it can be @@ -5059,7 +5074,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue PVal, KVal, QVal; if (VT.isVector()) { - if (HadOneDivisor) { + if (HadTautologicalLanes) { // Try to turn PAmts into a splat, since we don't care about the values // that are currently '0'. If we can't, just keep '0'`s. turnVectorIntoSplatVector(PAmts, isNullConstant); @@ -5096,8 +5111,41 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, } // UREM: (setule/setugt (rotr (mul N, P), K), Q) - return DAG.getSetCC(DL, SETCCVT, Op0, QVal, - ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT)); + SDValue NewCC = + DAG.getSetCC(DL, SETCCVT, Op0, QVal, + ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT)); + if (!HadTautologicalInvertedLanes) + return NewCC; + + // If any lanes previously compared always-false, the NewCC will give + // always-true result for them, so we need to fixup those lanes. + // Or the other way around for inequality predicate. + assert(VT.isVector() && "Can/should only get here for vectors."); + Created.push_back(NewCC.getNode()); + + // x u% C1` is *always* less than C1. So given `x u% C1 == C2`, + // if C2 is not less than C1, the comparison is always false. + // But we have produced the comparison that will give the + // opposive tautological answer. So these lanes would need to be fixed up. + SDValue TautologicalInvertedChannels = + DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE); + Created.push_back(TautologicalInvertedChannels.getNode()); + + if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) { + // If we have a vector select, let's replace the comparison results in the + // affected lanes with the correct tautological result. + SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true, + DL, SETCCVT, SETCCVT); + return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels, + Replacement, NewCC); + } + + // Else, we can just invert the comparison result in the appropriate lanes. + if (isOperationLegalOrCustom(ISD::XOR, SETCCVT)) + return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC, + TautologicalInvertedChannels); + + return SDValue(); // Don't know how to lower. } /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE |