summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp114
1 files changed, 114 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d02dc761db0..db68237eecb 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4178,6 +4178,10 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const {
return Subtarget.hasLZCNT();
}
+bool X86TargetLowering::isCtlzFast() const {
+ return Subtarget.hasFastLZCNT();
+}
+
bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
if (!Subtarget.hasBMI())
return false;
@@ -29090,6 +29094,113 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
return DAG.getBitcast(VT, Mask);
}
+// Helper function for combineOrCmpEqZeroToCtlzSrl
+// Transforms:
+// seteq(cmp x, 0)
+// into:
+// srl(ctlz x), log2(bitsize(x))
+// Input pattern is checked by caller.
+SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy, SelectionDAG &DAG) {
+ SDValue Cmp = Op.getOperand(1);
+ EVT VT = Cmp.getOperand(0).getValueType();
+ unsigned Log2b = Log2_32(VT.getSizeInBits());
+ SDLoc dl(Op);
+ SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Cmp->getOperand(0));
+ // The result of the shift is true or false, and on X86, the 32-bit
+ // encoding of shr and lzcnt is more desirable.
+ SDValue Trunc = DAG.getZExtOrTrunc(Clz, dl, MVT::i32);
+ SDValue Scc = DAG.getNode(ISD::SRL, dl, MVT::i32, Trunc,
+ DAG.getConstant(Log2b, dl, VT));
+ return DAG.getZExtOrTrunc(Scc, dl, ExtTy);
+}
+
+// Try to transform:
+// zext(or(setcc(eq, (cmp x, 0)), setcc(eq, (cmp y, 0))))
+// into:
+// srl(or(ctlz(x), ctlz(y)), log2(bitsize(x))
+// Will also attempt to match more generic cases, eg:
+// zext(or(or(setcc(eq, cmp 0), setcc(eq, cmp 0)), setcc(eq, cmp 0)))
+// Only applies if the target supports the FastLZCNT feature.
+static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ if (DCI.isBeforeLegalize() || !Subtarget.getTargetLowering()->isCtlzFast())
+ return SDValue();
+
+ auto isORCandidate = [](SDValue N) {
+ return (N->getOpcode() == ISD::OR && N->hasOneUse());
+ };
+
+ // Check the zero extend is extending to 32-bit or more. The code generated by
+ // srl(ctlz) for 16-bit or less variants of the pattern would require extra
+ // instructions to clear the upper bits.
+ if (!N->hasOneUse() || !N->getSimpleValueType(0).bitsGE(MVT::i32) ||
+ !isORCandidate(N->getOperand(0)))
+ return SDValue();
+
+ // Check the node matches: setcc(eq, cmp 0)
+ auto isSetCCCandidate = [](SDValue N) {
+ return N->getOpcode() == X86ISD::SETCC && N->hasOneUse() &&
+ X86::CondCode(N->getConstantOperandVal(0)) == X86::COND_E &&
+ N->getOperand(1).getOpcode() == X86ISD::CMP &&
+ N->getOperand(1).getConstantOperandVal(1) == 0 &&
+ N->getOperand(1).getValueType().bitsGE(MVT::i32);
+ };
+
+ SDNode *OR = N->getOperand(0).getNode();
+ SDValue LHS = OR->getOperand(0);
+ SDValue RHS = OR->getOperand(1);
+
+ // Save nodes matching or(or, setcc(eq, cmp 0)).
+ SmallVector<SDNode *, 2> ORNodes;
+ while (((isORCandidate(LHS) && isSetCCCandidate(RHS)) ||
+ (isORCandidate(RHS) && isSetCCCandidate(LHS)))) {
+ ORNodes.push_back(OR);
+ OR = (LHS->getOpcode() == ISD::OR) ? LHS.getNode() : RHS.getNode();
+ LHS = OR->getOperand(0);
+ RHS = OR->getOperand(1);
+ }
+
+ // The last OR node should match or(setcc(eq, cmp 0), setcc(eq, cmp 0)).
+ if (!(isSetCCCandidate(LHS) && isSetCCCandidate(RHS)) ||
+ !isORCandidate(SDValue(OR, 0)))
+ return SDValue();
+
+ // We have a or(setcc(eq, cmp 0), setcc(eq, cmp 0)) pattern, try to lower it
+ // to
+ // or(srl(ctlz),srl(ctlz)).
+ // The dag combiner can then fold it into:
+ // srl(or(ctlz, ctlz)).
+ EVT VT = OR->getValueType(0);
+ SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, VT, DAG);
+ SDValue Ret, NewRHS;
+ if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG)))
+ Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, NewLHS, NewRHS);
+
+ if (!Ret)
+ return SDValue();
+
+ // Try to lower nodes matching the or(or, setcc(eq, cmp 0)) pattern.
+ while (ORNodes.size() > 0) {
+ OR = ORNodes.pop_back_val();
+ LHS = OR->getOperand(0);
+ RHS = OR->getOperand(1);
+ // Swap rhs with lhs to match or(setcc(eq, cmp, 0), or).
+ if (RHS->getOpcode() == ISD::OR)
+ std::swap(LHS, RHS);
+ EVT VT = OR->getValueType(0);
+ SDValue NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG);
+ if (!NewRHS)
+ return SDValue();
+ Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, Ret, NewRHS);
+ }
+
+ if (Ret)
+ Ret = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret);
+
+ return Ret;
+}
+
static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -31121,6 +31232,9 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget))
return NewAdd;
+ if (SDValue R = combineOrCmpEqZeroToCtlzSrl(N, DAG, DCI, Subtarget))
+ return R;
+
return SDValue();
}
OpenPOWER on IntegriCloud