diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 173 |
1 files changed, 123 insertions, 50 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 3d9c08046e5..4dd2932f56b 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -227,6 +227,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::SETCC); setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::XOR); setTargetDAGCombine(ISD::UINT_TO_FP); setTargetDAGCombine(ISD::FCANONICALIZE); @@ -2899,23 +2900,62 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N, return DAG.getNode(ISD::ADD, SL, VT, ShlX, COffset); } +static bool bitOpWithConstantIsReducible(unsigned Opc, uint32_t Val) { + return (Opc == ISD::AND && (Val == 0 || Val == 0xffffffff)) || + (Opc == ISD::OR && (Val == 0xffffffff || Val == 0)) || + (Opc == ISD::XOR && Val == 0); +} + +// Break up 64-bit bit operation of a constant into two 32-bit and/or/xor. This +// will typically happen anyway for a VALU 64-bit and. This exposes other 32-bit +// integer combine opportunities since most 64-bit operations are decomposed +// this way. TODO: We won't want this for SALU especially if it is an inline +// immediate. +SDValue SITargetLowering::splitBinaryBitConstantOp( + DAGCombinerInfo &DCI, + const SDLoc &SL, + unsigned Opc, SDValue LHS, + const ConstantSDNode *CRHS) const { + uint64_t Val = CRHS->getZExtValue(); + uint32_t ValLo = Lo_32(Val); + uint32_t ValHi = Hi_32(Val); + const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); + + if ((bitOpWithConstantIsReducible(Opc, ValLo) || + bitOpWithConstantIsReducible(Opc, ValHi)) || + (CRHS->hasOneUse() && !TII->isInlineConstant(CRHS->getAPIntValue()))) { + // If we need to materialize a 64-bit immediate, it will be split up later + // anyway. Avoid creating the harder to understand 64-bit immediate + // materialization. + return splitBinaryBitConstantOpImpl(DCI, SL, Opc, LHS, ValLo, ValHi); + } + + return SDValue(); +} + SDValue SITargetLowering::performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const { if (DCI.isBeforeLegalize()) return SDValue(); - if (SDValue Base = AMDGPUTargetLowering::performAndCombine(N, DCI)) - return Base; - SelectionDAG &DAG = DCI.DAG; - - // (and (fcmp ord x, x), (fcmp une (fabs x), inf)) -> - // fp_class x, ~(s_nan | q_nan | n_infinity | p_infinity) + EVT VT = N->getValueType(0); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - if (LHS.getOpcode() == ISD::SETCC && - RHS.getOpcode() == ISD::SETCC) { + + if (VT == MVT::i64) { + const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS); + if (CRHS) { + if (SDValue Split + = splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::AND, LHS, CRHS)) + return Split; + } + } + + // (and (fcmp ord x, x), (fcmp une (fabs x), inf)) -> + // fp_class x, ~(s_nan | q_nan | n_infinity | p_infinity) + if (LHS.getOpcode() == ISD::SETCC && RHS.getOpcode() == ISD::SETCC) { ISD::CondCode LCC = cast<CondCodeSDNode>(LHS.getOperand(2))->get(); ISD::CondCode RCC = cast<CondCodeSDNode>(RHS.getOperand(2))->get(); @@ -2963,54 +3003,85 @@ SDValue SITargetLowering::performOrCombine(SDNode *N, SDValue RHS = N->getOperand(1); EVT VT = N->getValueType(0); - if (VT == MVT::i64) { - // TODO: This could be a generic combine with a predicate for extracting the - // high half of an integer being free. - - // (or i64:x, (zero_extend i32:y)) -> - // i64 (bitcast (v2i32 build_vector (or i32:y, lo_32(x)), hi_32(x))) - if (LHS.getOpcode() == ISD::ZERO_EXTEND && - RHS.getOpcode() != ISD::ZERO_EXTEND) - std::swap(LHS, RHS); - - if (RHS.getOpcode() == ISD::ZERO_EXTEND) { - SDValue ExtSrc = RHS.getOperand(0); - EVT SrcVT = ExtSrc.getValueType(); - if (SrcVT == MVT::i32) { - SDLoc SL(N); - SDValue LowLHS, HiBits; - std::tie(LowLHS, HiBits) = split64BitValue(LHS, DAG); - SDValue LowOr = DAG.getNode(ISD::OR, SL, MVT::i32, LowLHS, ExtSrc); - - DCI.AddToWorklist(LowOr.getNode()); - DCI.AddToWorklist(HiBits.getNode()); - - SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, - LowOr, HiBits); - return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec); - } + if (VT == MVT::i1) { + // or (fp_class x, c1), (fp_class x, c2) -> fp_class x, (c1 | c2) + if (LHS.getOpcode() == AMDGPUISD::FP_CLASS && + RHS.getOpcode() == AMDGPUISD::FP_CLASS) { + SDValue Src = LHS.getOperand(0); + if (Src != RHS.getOperand(0)) + return SDValue(); + + const ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(LHS.getOperand(1)); + const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS.getOperand(1)); + if (!CLHS || !CRHS) + return SDValue(); + + // Only 10 bits are used. + static const uint32_t MaxMask = 0x3ff; + + uint32_t NewMask = (CLHS->getZExtValue() | CRHS->getZExtValue()) & MaxMask; + SDLoc DL(N); + return DAG.getNode(AMDGPUISD::FP_CLASS, DL, MVT::i1, + Src, DAG.getConstant(NewMask, DL, MVT::i32)); } + + return SDValue(); } - // or (fp_class x, c1), (fp_class x, c2) -> fp_class x, (c1 | c2) - if (LHS.getOpcode() == AMDGPUISD::FP_CLASS && - RHS.getOpcode() == AMDGPUISD::FP_CLASS) { - SDValue Src = LHS.getOperand(0); - if (Src != RHS.getOperand(0)) - return SDValue(); + if (VT != MVT::i64) + return SDValue(); - const ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(LHS.getOperand(1)); - const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS.getOperand(1)); - if (!CLHS || !CRHS) - return SDValue(); + // TODO: This could be a generic combine with a predicate for extracting the + // high half of an integer being free. + + // (or i64:x, (zero_extend i32:y)) -> + // i64 (bitcast (v2i32 build_vector (or i32:y, lo_32(x)), hi_32(x))) + if (LHS.getOpcode() == ISD::ZERO_EXTEND && + RHS.getOpcode() != ISD::ZERO_EXTEND) + std::swap(LHS, RHS); + + if (RHS.getOpcode() == ISD::ZERO_EXTEND) { + SDValue ExtSrc = RHS.getOperand(0); + EVT SrcVT = ExtSrc.getValueType(); + if (SrcVT == MVT::i32) { + SDLoc SL(N); + SDValue LowLHS, HiBits; + std::tie(LowLHS, HiBits) = split64BitValue(LHS, DAG); + SDValue LowOr = DAG.getNode(ISD::OR, SL, MVT::i32, LowLHS, ExtSrc); + + DCI.AddToWorklist(LowOr.getNode()); + DCI.AddToWorklist(HiBits.getNode()); + + SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, + LowOr, HiBits); + return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec); + } + } - // Only 10 bits are used. - static const uint32_t MaxMask = 0x3ff; + const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (CRHS) { + if (SDValue Split + = splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::OR, LHS, CRHS)) + return Split; + } + + return SDValue(); +} + +SDValue SITargetLowering::performXorCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + EVT VT = N->getValueType(0); + if (VT != MVT::i64) + return SDValue(); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); - uint32_t NewMask = (CLHS->getZExtValue() | CRHS->getZExtValue()) & MaxMask; - SDLoc DL(N); - return DAG.getNode(AMDGPUISD::FP_CLASS, DL, MVT::i1, - Src, DAG.getConstant(NewMask, DL, MVT::i32)); + const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS); + if (CRHS) { + if (SDValue Split + = splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::XOR, LHS, CRHS)) + return Split; } return SDValue(); @@ -3427,6 +3498,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, return performAndCombine(N, DCI); case ISD::OR: return performOrCombine(N, DCI); + case ISD::XOR: + return performXorCombine(N, DCI); case AMDGPUISD::FP_CLASS: return performClassCombine(N, DCI); case ISD::FCANONICALIZE: |

