summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp173
1 files changed, 123 insertions, 50 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 3d9c08046e5..4dd2932f56b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -227,6 +227,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SETCC);
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
+ setTargetDAGCombine(ISD::XOR);
setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::FCANONICALIZE);
@@ -2899,23 +2900,62 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N,
return DAG.getNode(ISD::ADD, SL, VT, ShlX, COffset);
}
+static bool bitOpWithConstantIsReducible(unsigned Opc, uint32_t Val) {
+ return (Opc == ISD::AND && (Val == 0 || Val == 0xffffffff)) ||
+ (Opc == ISD::OR && (Val == 0xffffffff || Val == 0)) ||
+ (Opc == ISD::XOR && Val == 0);
+}
+
+// Break up 64-bit bit operation of a constant into two 32-bit and/or/xor. This
+// will typically happen anyway for a VALU 64-bit and. This exposes other 32-bit
+// integer combine opportunities since most 64-bit operations are decomposed
+// this way. TODO: We won't want this for SALU especially if it is an inline
+// immediate.
+SDValue SITargetLowering::splitBinaryBitConstantOp(
+ DAGCombinerInfo &DCI,
+ const SDLoc &SL,
+ unsigned Opc, SDValue LHS,
+ const ConstantSDNode *CRHS) const {
+ uint64_t Val = CRHS->getZExtValue();
+ uint32_t ValLo = Lo_32(Val);
+ uint32_t ValHi = Hi_32(Val);
+ const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
+
+ if ((bitOpWithConstantIsReducible(Opc, ValLo) ||
+ bitOpWithConstantIsReducible(Opc, ValHi)) ||
+ (CRHS->hasOneUse() && !TII->isInlineConstant(CRHS->getAPIntValue()))) {
+ // If we need to materialize a 64-bit immediate, it will be split up later
+ // anyway. Avoid creating the harder to understand 64-bit immediate
+ // materialization.
+ return splitBinaryBitConstantOpImpl(DCI, SL, Opc, LHS, ValLo, ValHi);
+ }
+
+ return SDValue();
+}
+
SDValue SITargetLowering::performAndCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
if (DCI.isBeforeLegalize())
return SDValue();
- if (SDValue Base = AMDGPUTargetLowering::performAndCombine(N, DCI))
- return Base;
-
SelectionDAG &DAG = DCI.DAG;
-
- // (and (fcmp ord x, x), (fcmp une (fabs x), inf)) ->
- // fp_class x, ~(s_nan | q_nan | n_infinity | p_infinity)
+ EVT VT = N->getValueType(0);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
- if (LHS.getOpcode() == ISD::SETCC &&
- RHS.getOpcode() == ISD::SETCC) {
+
+ if (VT == MVT::i64) {
+ const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
+ if (CRHS) {
+ if (SDValue Split
+ = splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::AND, LHS, CRHS))
+ return Split;
+ }
+ }
+
+ // (and (fcmp ord x, x), (fcmp une (fabs x), inf)) ->
+ // fp_class x, ~(s_nan | q_nan | n_infinity | p_infinity)
+ if (LHS.getOpcode() == ISD::SETCC && RHS.getOpcode() == ISD::SETCC) {
ISD::CondCode LCC = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
ISD::CondCode RCC = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
@@ -2963,54 +3003,85 @@ SDValue SITargetLowering::performOrCombine(SDNode *N,
SDValue RHS = N->getOperand(1);
EVT VT = N->getValueType(0);
- if (VT == MVT::i64) {
- // TODO: This could be a generic combine with a predicate for extracting the
- // high half of an integer being free.
-
- // (or i64:x, (zero_extend i32:y)) ->
- // i64 (bitcast (v2i32 build_vector (or i32:y, lo_32(x)), hi_32(x)))
- if (LHS.getOpcode() == ISD::ZERO_EXTEND &&
- RHS.getOpcode() != ISD::ZERO_EXTEND)
- std::swap(LHS, RHS);
-
- if (RHS.getOpcode() == ISD::ZERO_EXTEND) {
- SDValue ExtSrc = RHS.getOperand(0);
- EVT SrcVT = ExtSrc.getValueType();
- if (SrcVT == MVT::i32) {
- SDLoc SL(N);
- SDValue LowLHS, HiBits;
- std::tie(LowLHS, HiBits) = split64BitValue(LHS, DAG);
- SDValue LowOr = DAG.getNode(ISD::OR, SL, MVT::i32, LowLHS, ExtSrc);
-
- DCI.AddToWorklist(LowOr.getNode());
- DCI.AddToWorklist(HiBits.getNode());
-
- SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
- LowOr, HiBits);
- return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
- }
+ if (VT == MVT::i1) {
+ // or (fp_class x, c1), (fp_class x, c2) -> fp_class x, (c1 | c2)
+ if (LHS.getOpcode() == AMDGPUISD::FP_CLASS &&
+ RHS.getOpcode() == AMDGPUISD::FP_CLASS) {
+ SDValue Src = LHS.getOperand(0);
+ if (Src != RHS.getOperand(0))
+ return SDValue();
+
+ const ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
+ const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS.getOperand(1));
+ if (!CLHS || !CRHS)
+ return SDValue();
+
+ // Only 10 bits are used.
+ static const uint32_t MaxMask = 0x3ff;
+
+ uint32_t NewMask = (CLHS->getZExtValue() | CRHS->getZExtValue()) & MaxMask;
+ SDLoc DL(N);
+ return DAG.getNode(AMDGPUISD::FP_CLASS, DL, MVT::i1,
+ Src, DAG.getConstant(NewMask, DL, MVT::i32));
}
+
+ return SDValue();
}
- // or (fp_class x, c1), (fp_class x, c2) -> fp_class x, (c1 | c2)
- if (LHS.getOpcode() == AMDGPUISD::FP_CLASS &&
- RHS.getOpcode() == AMDGPUISD::FP_CLASS) {
- SDValue Src = LHS.getOperand(0);
- if (Src != RHS.getOperand(0))
- return SDValue();
+ if (VT != MVT::i64)
+ return SDValue();
- const ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
- const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS.getOperand(1));
- if (!CLHS || !CRHS)
- return SDValue();
+ // TODO: This could be a generic combine with a predicate for extracting the
+ // high half of an integer being free.
+
+ // (or i64:x, (zero_extend i32:y)) ->
+ // i64 (bitcast (v2i32 build_vector (or i32:y, lo_32(x)), hi_32(x)))
+ if (LHS.getOpcode() == ISD::ZERO_EXTEND &&
+ RHS.getOpcode() != ISD::ZERO_EXTEND)
+ std::swap(LHS, RHS);
+
+ if (RHS.getOpcode() == ISD::ZERO_EXTEND) {
+ SDValue ExtSrc = RHS.getOperand(0);
+ EVT SrcVT = ExtSrc.getValueType();
+ if (SrcVT == MVT::i32) {
+ SDLoc SL(N);
+ SDValue LowLHS, HiBits;
+ std::tie(LowLHS, HiBits) = split64BitValue(LHS, DAG);
+ SDValue LowOr = DAG.getNode(ISD::OR, SL, MVT::i32, LowLHS, ExtSrc);
+
+ DCI.AddToWorklist(LowOr.getNode());
+ DCI.AddToWorklist(HiBits.getNode());
+
+ SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
+ LowOr, HiBits);
+ return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
+ }
+ }
- // Only 10 bits are used.
- static const uint32_t MaxMask = 0x3ff;
+ const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (CRHS) {
+ if (SDValue Split
+ = splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::OR, LHS, CRHS))
+ return Split;
+ }
+
+ return SDValue();
+}
+
+SDValue SITargetLowering::performXorCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i64)
+ return SDValue();
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
- uint32_t NewMask = (CLHS->getZExtValue() | CRHS->getZExtValue()) & MaxMask;
- SDLoc DL(N);
- return DAG.getNode(AMDGPUISD::FP_CLASS, DL, MVT::i1,
- Src, DAG.getConstant(NewMask, DL, MVT::i32));
+ const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
+ if (CRHS) {
+ if (SDValue Split
+ = splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::XOR, LHS, CRHS))
+ return Split;
}
return SDValue();
@@ -3427,6 +3498,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
return performAndCombine(N, DCI);
case ISD::OR:
return performOrCombine(N, DCI);
+ case ISD::XOR:
+ return performXorCombine(N, DCI);
case AMDGPUISD::FP_CLASS:
return performClassCombine(N, DCI);
case ISD::FCANONICALIZE:
OpenPOWER on IntegriCloud