diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 36 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 7 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 88 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 48 | ||||
-rw-r--r-- | llvm/lib/CodeGen/TargetLoweringBase.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 50 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 14 |
9 files changed, 204 insertions, 49 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9f3ad172f0e..b6be6d1dfc5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -325,6 +325,7 @@ namespace { SDValue visitSHL(SDNode *N); SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); + SDValue visitFunnelShift(SDNode *N); SDValue visitRotate(SDNode *N); SDValue visitABS(SDNode *N); SDValue visitBSWAP(SDNode *N); @@ -1513,6 +1514,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::SRL: return visitSRL(N); case ISD::ROTR: case ISD::ROTL: return visitRotate(N); + case ISD::FSHL: + case ISD::FSHR: return visitFunnelShift(N); case ISD::ABS: return visitABS(N); case ISD::BSWAP: return visitBSWAP(N); case ISD::BITREVERSE: return visitBITREVERSE(N); @@ -6926,6 +6929,39 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFunnelShift(SDNode *N) { + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + bool IsFSHL = N->getOpcode() == ISD::FSHL; + unsigned BitWidth = VT.getScalarSizeInBits(); + + // fold (fshl N0, N1, 0) -> N0 + // fold (fshr N0, N1, 0) -> N1 + if (DAG.MaskedValueIsZero(N2, APInt::getAllOnesValue(BitWidth))) + return IsFSHL ? N0 : N1; + + // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth) + if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) { + if (Cst->getAPIntValue().uge(BitWidth)) { + uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth); + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1, + DAG.getConstant(RotAmt, SDLoc(N), N2.getValueType())); + } + } + + // fold (fshl N0, N0, N2) -> (rotl N0, N2) + // fold (fshr N0, N0, N2) -> (rotr N0, N2) + // TODO: Investigate flipping this rotate if only one is legal, if funnel shift + // is legal as well we might be better off avoiding non-constant (BW - N2). + unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR; + if (N0 == N1 && hasOperation(RotOpc, VT)) + return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2); + + return SDValue(); +} + SDValue DAGCombiner::visitABS(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index dcb479e4ce1..2f4a5451a61 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1170,6 +1170,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } } break; + case ISD::FSHL: + case ISD::FSHR: case ISD::SRL_PARTS: case ISD::SRA_PARTS: case ISD::SHL_PARTS: { @@ -3262,6 +3264,11 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } break; } + case ISD::FSHL: + case ISD::FSHR: + if (TLI.expandFunnelShift(Node, Tmp1, DAG)) + Results.push_back(Tmp1); + break; case ISD::SADDSAT: case ISD::UADDSAT: case ISD::SSUBSAT: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 4f283e9f3b8..4229e577a3d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -129,12 +129,13 @@ class VectorLegalizer { SDValue ExpandFNEG(SDValue Op); SDValue ExpandFSUB(SDValue Op); SDValue ExpandBITREVERSE(SDValue Op); - SDValue ExpandCTPOP(SDValue Op);
- SDValue ExpandCTLZ(SDValue Op);
- SDValue ExpandCTTZ(SDValue Op);
- SDValue ExpandFMINNUM_FMAXNUM(SDValue Op);
- SDValue ExpandStrictFPOp(SDValue Op);
-
+ SDValue ExpandCTPOP(SDValue Op); + SDValue ExpandCTLZ(SDValue Op); + SDValue ExpandCTTZ(SDValue Op); + SDValue ExpandFunnelShift(SDValue Op); + SDValue ExpandFMINNUM_FMAXNUM(SDValue Op); + SDValue ExpandStrictFPOp(SDValue Op); + /// Implements vector promotion. /// /// This is essentially just bitcasting the operands to a different type and @@ -746,12 +747,15 @@ SDValue VectorLegalizer::Expand(SDValue Op) { case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: return ExpandCTLZ(Op); - case ISD::CTTZ:
- case ISD::CTTZ_ZERO_UNDEF:
- return ExpandCTTZ(Op);
- case ISD::FMINNUM:
- case ISD::FMAXNUM:
- return ExpandFMINNUM_FMAXNUM(Op);
+ case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: + return ExpandCTTZ(Op); + case ISD::FSHL: + case ISD::FSHR: + return ExpandFunnelShift(Op); + case ISD::FMINNUM: + case ISD::FMAXNUM: + return ExpandFMINNUM_FMAXNUM(Op); case ISD::STRICT_FADD: case ISD::STRICT_FSUB: case ISD::STRICT_FMUL: @@ -1123,32 +1127,40 @@ SDValue VectorLegalizer::ExpandFSUB(SDValue Op) { return Op; // Defer to LegalizeDAG return DAG.UnrollVectorOp(Op.getNode()); -}
-
-SDValue VectorLegalizer::ExpandCTPOP(SDValue Op) {
- SDValue Result;
- if (TLI.expandCTPOP(Op.getNode(), Result, DAG))
- return Result;
-
- return DAG.UnrollVectorOp(Op.getNode());
-}
-
-SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) {
- SDValue Result;
- if (TLI.expandCTLZ(Op.getNode(), Result, DAG))
- return Result;
-
- return DAG.UnrollVectorOp(Op.getNode());
-}
-
-SDValue VectorLegalizer::ExpandCTTZ(SDValue Op) {
- SDValue Result;
- if (TLI.expandCTTZ(Op.getNode(), Result, DAG))
- return Result;
-
- return DAG.UnrollVectorOp(Op.getNode());
-}
-
+} + +SDValue VectorLegalizer::ExpandCTPOP(SDValue Op) { + SDValue Result; + if (TLI.expandCTPOP(Op.getNode(), Result, DAG)) + return Result; + + return DAG.UnrollVectorOp(Op.getNode()); +} + +SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) { + SDValue Result; + if (TLI.expandCTLZ(Op.getNode(), Result, DAG)) + return Result; + + return DAG.UnrollVectorOp(Op.getNode()); +} + +SDValue VectorLegalizer::ExpandCTTZ(SDValue Op) { + SDValue Result; + if (TLI.expandCTTZ(Op.getNode(), Result, DAG)) + return Result; + + return DAG.UnrollVectorOp(Op.getNode()); +} + +SDValue VectorLegalizer::ExpandFunnelShift(SDValue Op) { + SDValue Result; + if (TLI.expandFunnelShift(Op.getNode(), Result, DAG)) + return Result; + + return DAG.UnrollVectorOp(Op.getNode()); +} + SDValue VectorLegalizer::ExpandFMINNUM_FMAXNUM(SDValue Op) { if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Op.getNode(), DAG)) return Expanded; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index a6a8ebe1a47..d0d748f3841 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5751,6 +5751,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Zero = DAG.getConstant(0, sdl, VT); SDValue ShAmt = DAG.getNode(ISD::UREM, sdl, VT, Z, BitWidthC); + auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR; + if (TLI.isOperationLegalOrCustom(FunnelOpcode, VT)) { + setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z)); + return nullptr; + } + // When X == Y, this is rotate. If the data type has a power-of-2 size, we // avoid the select that is necessary in the general case to filter out // the 0-shift possibility that leads to UB. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 02d45df5864..bf81d0d267a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -237,6 +237,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::SRL: return "srl"; case ISD::ROTL: return "rotl"; case ISD::ROTR: return "rotr"; + case ISD::FSHL: return "fshl"; + case ISD::FSHR: return "fshr"; case ISD::FADD: return "fadd"; case ISD::STRICT_FADD: return "strict_fadd"; case ISD::FSUB: return "fsub"; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 04303d682d3..6654a02e97a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -4114,6 +4114,54 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, return Ok; } +bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result, + SelectionDAG &DAG) const { + EVT VT = Node->getValueType(0); + + if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) || + !isOperationLegalOrCustom(ISD::SRL, VT) || + !isOperationLegalOrCustom(ISD::SUB, VT) || + !isOperationLegalOrCustomOrPromote(ISD::OR, VT))) + return false; + + // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) + // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) + SDValue X = Node->getOperand(0); + SDValue Y = Node->getOperand(1); + SDValue Z = Node->getOperand(2); + + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + bool IsFSHL = Node->getOpcode() == ISD::FSHL; + SDLoc DL(SDValue(Node, 0)); + + EVT ShVT = Z.getValueType(); + SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT); + SDValue Zero = DAG.getConstant(0, DL, ShVT); + + SDValue ShAmt; + if (isPowerOf2_32(EltSizeInBits)) { + SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, ShVT); + ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask); + } else { + ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC); + } + + SDValue InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt); + SDValue ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt); + SDValue ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt); + SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShX, ShY); + + // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth, + // and that is undefined. We must compare and select to avoid UB. + EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShVT); + + // For fshl, 0-shift returns the 1st arg (X). + // For fshr, 0-shift returns the 2nd arg (Y). + SDValue IsZeroShift = DAG.getSetCC(DL, CCVT, ShAmt, Zero, ISD::SETEQ); + Result = DAG.getSelect(DL, VT, IsZeroShift, IsFSHL ? X : Y, Or); + return true; +} + bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const { SDValue Src = Node->getOperand(0); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 44afbef946f..b6eeda4811e 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -610,6 +610,8 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::UMIN, VT, Expand); setOperationAction(ISD::UMAX, VT, Expand); setOperationAction(ISD::ABS, VT, Expand); + setOperationAction(ISD::FSHL, VT, Expand); + setOperationAction(ISD::FSHR, VT, Expand); setOperationAction(ISD::SADDSAT, VT, Expand); setOperationAction(ISD::UADDSAT, VT, Expand); setOperationAction(ISD::SSUBSAT, VT, Expand); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index da1ebb809c0..79c8f6e5535 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -195,6 +195,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::ABS , MVT::i64 , Custom); } + // Funnel shifts. + for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) { + setOperationAction(ShiftOp , MVT::i16 , Custom); + setOperationAction(ShiftOp , MVT::i32 , Custom); + if (Subtarget.is64Bit()) + setOperationAction(ShiftOp , MVT::i64 , Custom); + } + // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this // operation. setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); @@ -16972,6 +16980,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { /// Lower SRA_PARTS and friends, which return two i32 values /// and take a 2 x i32 value to shift plus a shift amount. +/// TODO: Can this be moved to general expansion code? static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); MVT VT = Op.getSimpleValueType(); @@ -16981,8 +16990,8 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) { SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); - // X86ISD::SHLD and X86ISD::SHRD have defined overflow behavior but the - // generic ISD nodes haven't. Insert an AND to be safe, it's optimized away + // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and + // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's optimized away // during isel. SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt, DAG.getConstant(VTBits - 1, dl, MVT::i8)); @@ -16992,10 +17001,10 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) { SDValue Tmp2, Tmp3; if (Op.getOpcode() == ISD::SHL_PARTS) { - Tmp2 = DAG.getNode(X86ISD::SHLD, dl, VT, ShOpHi, ShOpLo, ShAmt); + Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt); Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt); } else { - Tmp2 = DAG.getNode(X86ISD::SHRD, dl, VT, ShOpLo, ShOpHi, ShAmt); + Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt); Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt); } @@ -17019,6 +17028,37 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) { return DAG.getMergeValues({ Lo, Hi }, dl); } +static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); + assert((Op.getOpcode() == ISD::FSHL || Op.getOpcode() == ISD::FSHR) && + "Unexpected funnel shift opcode!"); + assert((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) && + "Unexpected funnel shift type!"); + + SDLoc DL(Op); + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Amt = Op.getOperand(2); + + // Expand slow SHLD/SHRD cases. + // TODO - can we be more selective here: OptSize/RMW etc.? + if (Subtarget.isSHLDSlow()) + return SDValue(); + + bool IsFSHR = Op.getOpcode() == ISD::FSHR; + if (IsFSHR) + std::swap(Op0, Op1); + + // i16 needs to modulo the shift amount, but i32/i64 have implicit modulo. + if (VT == MVT::i16) + Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt, + DAG.getConstant(15, DL, Amt.getValueType())); + + unsigned SHDOp = (IsFSHR ? X86ISD::SHRD : X86ISD::SHLD); + return DAG.getNode(SHDOp, DL, VT, Op0, Op1, Amt); +} + // Try to use a packed vector operation to handle i64 on 32-bit targets when // AVX512DQ is enabled. static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG, @@ -26115,6 +26155,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SHL_PARTS: case ISD::SRA_PARTS: case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG); + case ISD::FSHL: + case ISD::FSHR: return LowerFunnelShift(Op, Subtarget, DAG); case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 9ead6a614fc..78893221592 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2022,7 +2022,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, static const CostTblEntry X64CostTbl[] = { // 64-bit targets { ISD::ROTL, MVT::i64, 1 }, { ISD::ROTR, MVT::i64, 1 }, - { X86ISD::SHLD, MVT::i64, 4 } + { ISD::FSHL, MVT::i64, 4 } }; static const CostTblEntry X86CostTbl[] = { // 32 or 64-bit targets { ISD::ROTL, MVT::i32, 1 }, @@ -2031,9 +2031,9 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, { ISD::ROTR, MVT::i32, 1 }, { ISD::ROTR, MVT::i16, 1 }, { ISD::ROTR, MVT::i8, 1 }, - { X86ISD::SHLD, MVT::i32, 4 }, - { X86ISD::SHLD, MVT::i16, 4 }, - { X86ISD::SHLD, MVT::i8, 4 } + { ISD::FSHL, MVT::i32, 4 }, + { ISD::FSHL, MVT::i16, 4 }, + { ISD::FSHL, MVT::i8, 4 } }; unsigned ISD = ISD::DELETED_NODE; @@ -2041,13 +2041,13 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, default: break; case Intrinsic::fshl: - ISD = X86ISD::SHLD; + ISD = ISD::FSHL; if (Args[0] == Args[1]) ISD = ISD::ROTL; break; case Intrinsic::fshr: - // SHRD has same costs so don't duplicate. - ISD = X86ISD::SHLD; + // FSHR has same costs so don't duplicate. + ISD = ISD::FSHL; if (Args[0] == Args[1]) ISD = ISD::ROTR; break; |