diff options
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 70 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 5 |
3 files changed, 66 insertions, 19 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 15b60ba5bf6..3ef7b2c7697 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2131,6 +2131,16 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case X86ISD::GlobalBaseReg: return getGlobalBaseReg(); + case X86ISD::SHRUNKBLEND: { + // SHRUNKBLEND selects like a regular VSELECT. + SDValue VSelect = CurDAG->getNode( + ISD::VSELECT, SDLoc(Node), Node->getValueType(0), Node->getOperand(0), + Node->getOperand(1), Node->getOperand(2)); + ReplaceUses(SDValue(Node, 0), VSelect); + SelectCode(VSelect.getNode()); + // We already called ReplaceUses. + return nullptr; + } case ISD::ATOMIC_LOAD_XOR: case ISD::ATOMIC_LOAD_AND: diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index bde948174d7..53d9f913429 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19025,6 +19025,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::ANDNP: return "X86ISD::ANDNP"; case X86ISD::PSIGN: return "X86ISD::PSIGN"; case X86ISD::BLENDI: return "X86ISD::BLENDI"; + case X86ISD::SHRUNKBLEND: return "X86ISD::SHRUNKBLEND"; case X86ISD::SUBUS: return "X86ISD::SUBUS"; case X86ISD::HADD: return "X86ISD::HADD"; case X86ISD::HSUB: return "X86ISD::HSUB"; @@ -22618,22 +22619,17 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // build_vector of constants. This will be taken care in a later // condition. (TLI.isOperationLegalOrCustom(ISD::VSELECT, VT) && VT != MVT::v16i16 && - VT != MVT::v8i16)) { + VT != MVT::v8i16) && + // Don't optimize vector of constants. Those are handled by + // the generic code and all the bits must be properly set for + // the generic optimizer. + !ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) { unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits(); // Don't optimize vector selects that map to mask-registers. if (BitWidth == 1) return SDValue(); - // Check all uses of that condition operand to check whether it will be - // consumed by non-BLEND instructions, which may depend on all bits are set - // properly. - for (SDNode::use_iterator I = Cond->use_begin(), - E = Cond->use_end(); I != E; ++I) - if (I->getOpcode() != ISD::VSELECT) - // TODO: Add other opcodes eventually lowered into BLEND. - return SDValue(); - assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1); @@ -22641,13 +22637,45 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(), DCI.isBeforeLegalizeOps()); if (TLO.ShrinkDemandedConstant(Cond, DemandedMask) || - (TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne, - TLO) && - // Don't optimize vector of constants. Those are handled by - // the generic code and all the bits must be properly set for - // the generic optimizer. - !ISD::isBuildVectorOfConstantSDNodes(TLO.New.getNode()))) - DCI.CommitTargetLoweringOpt(TLO); + TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne, + TLO)) { + // If we changed the computation somewhere in the DAG, this change + // will affect all users of Cond. + // Make sure it is fine and update all the nodes so that we do not + // use the generic VSELECT anymore. Otherwise, we may perform + // wrong optimizations as we messed up with the actual expectation + // for the vector boolean values. + if (Cond != TLO.Old) { + // Check all uses of that condition operand to check whether it will be + // consumed by non-BLEND instructions, which may depend on all bits are + // set properly. + for (SDNode::use_iterator I = Cond->use_begin(), E = Cond->use_end(); + I != E; ++I) + if (I->getOpcode() != ISD::VSELECT) + // TODO: Add other opcodes eventually lowered into BLEND. + return SDValue(); + + // Update all the users of the condition, before committing the change, + // so that the VSELECT optimizations that expect the correct vector + // boolean value will not be triggered. + for (SDNode::use_iterator I = Cond->use_begin(), E = Cond->use_end(); + I != E; ++I) + DAG.ReplaceAllUsesOfValueWith( + SDValue(*I, 0), + DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(*I), I->getValueType(0), + Cond, I->getOperand(1), I->getOperand(2))); + DCI.CommitTargetLoweringOpt(TLO); + return SDValue(); + } + // At this point, only Cond is changed. Change the condition + // just for N to keep the opportunity to optimize all other + // users their own way. + DAG.ReplaceAllUsesOfValueWith( + SDValue(N, 0), + DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(N), N->getValueType(0), + TLO.New, N->getOperand(1), N->getOperand(2))); + return SDValue(); + } } // We should generate an X86ISD::BLENDI from a vselect if its argument @@ -22661,7 +22689,9 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // Iff we find this pattern and the build_vectors are built from // constants, we translate the vselect into a shuffle_vector that we // know will be matched by LowerVECTOR_SHUFFLEtoBlend. - if (N->getOpcode() == ISD::VSELECT && !DCI.isBeforeLegalize()) { + if ((N->getOpcode() == ISD::VSELECT || + N->getOpcode() == X86ISD::SHRUNKBLEND) && + !DCI.isBeforeLegalize()) { SDValue Shuffle = TransformVSELECTtoBlendVECTOR_SHUFFLE(N, DAG, Subtarget); if (Shuffle.getNode()) return Shuffle; @@ -24854,7 +24884,9 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::EXTRACT_VECTOR_ELT: return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, DCI); case ISD::VSELECT: - case ISD::SELECT: return PerformSELECTCombine(N, DAG, DCI, Subtarget); + case ISD::SELECT: + case X86ISD::SHRUNKBLEND: + return PerformSELECTCombine(N, DAG, DCI, Subtarget); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI, Subtarget); case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget); case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget); diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 35e132b944a..2737703bd6d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -190,6 +190,11 @@ namespace llvm { /// BLENDI - Blend where the selector is an immediate. BLENDI, + /// SHRUNKBLEND - Blend where the condition has been shrunk. + /// This is used to emphasize that the condition mask is + /// no more valid for generic VSELECT optimizations. + SHRUNKBLEND, + /// ADDSUB - Combined add and sub on an FP vector. ADDSUB, |

