summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
diff options
context:
space:
mode:
authorRoman Lebedev <lebedev.ri@gmail.com>2019-11-22 15:16:03 +0300
committerRoman Lebedev <lebedev.ri@gmail.com>2019-11-22 15:16:03 +0300
commit3f46022e33bd33b3d8f816be3c3adbe7de806119 (patch)
treec34bfe814234f3e0462b234b6e3c3404599effe1 /llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
parent06e03bce802e35a7401ab0849515ad1ce0dd21f5 (diff)
downloadbcm5719-llvm-3f46022e33bd33b3d8f816be3c3adbe7de806119.tar.gz
bcm5719-llvm-3f46022e33bd33b3d8f816be3c3adbe7de806119.zip
[Codegen] TargetLowering::prepareUREMEqFold(): `x u% C1 ==/!= C2` with tautological C1 u<= C2 (PR35479)
Summary: This is a preparatory cleanup before i add more of this fold to deal with comparisons with non-zero. In essence, the current lowering is: ``` Name: (X % C1) == 0 -> X * C3 <= C4 Pre: (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, 0 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %r = icmp ule i8 %n3, %C4 ``` https://rise4fun.com/Alive/oqd It kinda just works, really no weird edge-cases. But it isn't all that great for when comparing with non-zero. In particular, given `(X % C1) == C2`, there will be problems in the always-false tautological case where `C2 u>= C1`: https://rise4fun.com/Alive/pH3 That case is tautological, always-false: ``` Name: (X % Y) u>= Y %o0 = urem i8 %x, %y %r = icmp uge i8 %o0, %y => %r = false ``` https://rise4fun.com/Alive/ofu While we can't/shouldn't get such tautological case normally, we do deal with non-splat vectors, so unless we want to give up in this case, we need to fixup/short-circuit such lanes. There are two lowering variants: 1. We can blend between whatever computed result and the correct tautological result ``` Name: (X % C1) == C2 -> X * C3 <= C4 || false Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %res = icmp ule i8 %n3, %C4 %r = select i1 %is_tautologically_false, i1 0, i1 %res ``` https://rise4fun.com/Alive/PjT5 https://rise4fun.com/Alive/1KV 2. We can invert the comparison result ``` Name: (X % C1) == C2 -> X * C3 <= C4 || false Pre: (C2 == 0 || C1 u<= C2) && (C1 u>> countTrailingZeros(C1)) * C3 == 1 %zz = and i8 C3, 0 ; trick alive into making C3 avaliable in precondition %o0 = urem i8 %x, C1 %r = icmp eq i8 %o0, C2 => %zz = and i8 C3, 0 ; and silence it from complaining about said reg %C4 = -1 /u C1 %n0 = mul i8 %x, C3 %n1 = lshr i8 %n0, countTrailingZeros(C1) ; rotate right %n2 = shl i8 %n0, ((8-countTrailingZeros(C1)) %u 8) ; rotate right %n3 = or i8 %n1, %n2 ; rotate right %is_tautologically_false = icmp ule i8 C1, C2 %C4_fixed = select i1 %is_tautologically_false, i8 -1, i8 %C4 %res = icmp ule i8 %n3, %C4_fixed %r = xor i1 %res, %is_tautologically_false ``` https://rise4fun.com/Alive/2xC https://rise4fun.com/Alive/jpb5 3. We can expand into `and`/`or`: https://rise4fun.com/Alive/WGn https://rise4fun.com/Alive/lcb5 Blend-one is likely better since we avoid having to load the replacement from constant pool. `xor` is second best since it's still pretty general. I'm not adding `and`/`or` variants. Reviewers: RKSimon, craig.topper, spatel Reviewed By: RKSimon Subscribers: nick, hiraditya, xbolva00, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70051
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp98
1 files changed, 73 insertions, 25 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 047cab5cb5f..9a9ac690aaa 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -4943,7 +4943,7 @@ SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
ISD::CondCode Cond,
DAGCombinerInfo &DCI,
const SDLoc &DL) const {
- SmallVector<SDNode *, 2> Built;
+ SmallVector<SDNode *, 4> Built;
if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
DCI, DL, Built)) {
for (SDNode *N : Built)
@@ -4978,26 +4978,40 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
if (!isOperationLegalOrCustom(ISD::MUL, VT))
return SDValue();
- // TODO: Could support comparing with non-zero too.
- ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
- if (!CompTarget || !CompTarget->isNullValue())
- return SDValue();
-
- bool HadOneDivisor = false;
- bool AllDivisorsAreOnes = true;
+ bool HadTautologicalLanes = false;
+ bool AllLanesAreTautological = true;
bool HadEvenDivisor = false;
bool AllDivisorsArePowerOfTwo = true;
- SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
+ bool HadTautologicalInvertedLanes = false;
+ SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
- auto BuildUREMPattern = [&](ConstantSDNode *C) {
+ auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
// Division by 0 is UB. Leave it to be constant-folded elsewhere.
- if (C->isNullValue())
+ if (CDiv->isNullValue())
return false;
- const APInt &D = C->getAPIntValue();
- // If all divisors are ones, we will prefer to avoid the fold.
- HadOneDivisor |= D.isOneValue();
- AllDivisorsAreOnes &= D.isOneValue();
+ const APInt &D = CDiv->getAPIntValue();
+ const APInt &Cmp = CCmp->getAPIntValue();
+
+ // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
+ // if C2 is not less than C1, the comparison is always false.
+ // But we will only be able to produce the comparison that will give the
+ // opposive tautological answer. So this lane would need to be fixed up.
+ bool TautologicalInvertedLane = D.ule(Cmp);
+ HadTautologicalInvertedLanes |= TautologicalInvertedLane;
+
+ // If we are checking that remainder is something smaller than the divisor,
+ // then this comparison isn't tautological. For now this is not handled,
+ // other than the comparison that remainder is zero.
+ if (!Cmp.isNullValue() && !TautologicalInvertedLane)
+ return false;
+
+ // If all lanes are tautological (either all divisors are ones, or divisor
+ // is not greater than the constant we are comparing with),
+ // we will prefer to avoid the fold.
+ bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane;
+ HadTautologicalLanes |= TautologicalLane;
+ AllLanesAreTautological &= TautologicalLane;
// Decompose D into D0 * 2^K
unsigned K = D.countTrailingZeros();
@@ -5025,13 +5039,14 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
"We are expecting that K is always less than all-ones for ShSVT");
- // If the divisor is 1 the result can be constant-folded.
- if (D.isOneValue()) {
+ // If the lane is tautological the result can be constant-folded.
+ if (TautologicalLane) {
// Set P and K amount to a bogus values so we can try to splat them.
P = 0;
K = -1;
- assert(Q.isAllOnesValue() &&
- "Expecting all-ones comparison for one divisor");
+ // And ensure that comparison constant is tautological,
+ // it will always compare true/false.
+ Q = -1;
}
PAmts.push_back(DAG.getConstant(P, DL, SVT));
@@ -5045,11 +5060,11 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
SDValue D = REMNode.getOperand(1);
// Collect the values from each element.
- if (!ISD::matchUnaryPredicate(D, BuildUREMPattern))
+ if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
return SDValue();
- // If this is a urem by a one, avoid the fold since it can be constant-folded.
- if (AllDivisorsAreOnes)
+ // If all lanes are tautological, the result can be constant-folded.
+ if (AllLanesAreTautological)
return SDValue();
// If this is a urem by a powers-of-two, avoid the fold since it can be
@@ -5059,7 +5074,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
SDValue PVal, KVal, QVal;
if (VT.isVector()) {
- if (HadOneDivisor) {
+ if (HadTautologicalLanes) {
// Try to turn PAmts into a splat, since we don't care about the values
// that are currently '0'. If we can't, just keep '0'`s.
turnVectorIntoSplatVector(PAmts, isNullConstant);
@@ -5096,8 +5111,41 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
}
// UREM: (setule/setugt (rotr (mul N, P), K), Q)
- return DAG.getSetCC(DL, SETCCVT, Op0, QVal,
- ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
+ SDValue NewCC =
+ DAG.getSetCC(DL, SETCCVT, Op0, QVal,
+ ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
+ if (!HadTautologicalInvertedLanes)
+ return NewCC;
+
+ // If any lanes previously compared always-false, the NewCC will give
+ // always-true result for them, so we need to fixup those lanes.
+ // Or the other way around for inequality predicate.
+ assert(VT.isVector() && "Can/should only get here for vectors.");
+ Created.push_back(NewCC.getNode());
+
+ // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
+ // if C2 is not less than C1, the comparison is always false.
+ // But we have produced the comparison that will give the
+ // opposive tautological answer. So these lanes would need to be fixed up.
+ SDValue TautologicalInvertedChannels =
+ DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
+ Created.push_back(TautologicalInvertedChannels.getNode());
+
+ if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
+ // If we have a vector select, let's replace the comparison results in the
+ // affected lanes with the correct tautological result.
+ SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
+ DL, SETCCVT, SETCCVT);
+ return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
+ Replacement, NewCC);
+ }
+
+ // Else, we can just invert the comparison result in the appropriate lanes.
+ if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
+ return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
+ TautologicalInvertedChannels);
+
+ return SDValue(); // Don't know how to lower.
}
/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
OpenPOWER on IntegriCloud