summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp50
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp14
2 files changed, 53 insertions, 11 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index da1ebb809c0..79c8f6e5535 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -195,6 +195,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ABS , MVT::i64 , Custom);
}
+ // Funnel shifts.
+ for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
+ setOperationAction(ShiftOp , MVT::i16 , Custom);
+ setOperationAction(ShiftOp , MVT::i32 , Custom);
+ if (Subtarget.is64Bit())
+ setOperationAction(ShiftOp , MVT::i64 , Custom);
+ }
+
// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
// operation.
setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
@@ -16972,6 +16980,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
/// Lower SRA_PARTS and friends, which return two i32 values
/// and take a 2 x i32 value to shift plus a shift amount.
+/// TODO: Can this be moved to general expansion code?
static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
MVT VT = Op.getSimpleValueType();
@@ -16981,8 +16990,8 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
- // X86ISD::SHLD and X86ISD::SHRD have defined overflow behavior but the
- // generic ISD nodes haven't. Insert an AND to be safe, it's optimized away
+ // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
+ // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's optimized away
// during isel.
SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
DAG.getConstant(VTBits - 1, dl, MVT::i8));
@@ -16992,10 +17001,10 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
SDValue Tmp2, Tmp3;
if (Op.getOpcode() == ISD::SHL_PARTS) {
- Tmp2 = DAG.getNode(X86ISD::SHLD, dl, VT, ShOpHi, ShOpLo, ShAmt);
+ Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
} else {
- Tmp2 = DAG.getNode(X86ISD::SHRD, dl, VT, ShOpLo, ShOpHi, ShAmt);
+ Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
}
@@ -17019,6 +17028,37 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
return DAG.getMergeValues({ Lo, Hi }, dl);
}
+static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ assert((Op.getOpcode() == ISD::FSHL || Op.getOpcode() == ISD::FSHR) &&
+ "Unexpected funnel shift opcode!");
+ assert((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
+ "Unexpected funnel shift type!");
+
+ SDLoc DL(Op);
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Amt = Op.getOperand(2);
+
+ // Expand slow SHLD/SHRD cases.
+ // TODO - can we be more selective here: OptSize/RMW etc.?
+ if (Subtarget.isSHLDSlow())
+ return SDValue();
+
+ bool IsFSHR = Op.getOpcode() == ISD::FSHR;
+ if (IsFSHR)
+ std::swap(Op0, Op1);
+
+ // i16 needs to modulo the shift amount, but i32/i64 have implicit modulo.
+ if (VT == MVT::i16)
+ Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt,
+ DAG.getConstant(15, DL, Amt.getValueType()));
+
+ unsigned SHDOp = (IsFSHR ? X86ISD::SHRD : X86ISD::SHLD);
+ return DAG.getNode(SHDOp, DL, VT, Op0, Op1, Amt);
+}
+
// Try to use a packed vector operation to handle i64 on 32-bit targets when
// AVX512DQ is enabled.
static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG,
@@ -26115,6 +26155,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SHL_PARTS:
case ISD::SRA_PARTS:
case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
+ case ISD::FSHL:
+ case ISD::FSHR: return LowerFunnelShift(Op, Subtarget, DAG);
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 9ead6a614fc..78893221592 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2022,7 +2022,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
static const CostTblEntry X64CostTbl[] = { // 64-bit targets
{ ISD::ROTL, MVT::i64, 1 },
{ ISD::ROTR, MVT::i64, 1 },
- { X86ISD::SHLD, MVT::i64, 4 }
+ { ISD::FSHL, MVT::i64, 4 }
};
static const CostTblEntry X86CostTbl[] = { // 32 or 64-bit targets
{ ISD::ROTL, MVT::i32, 1 },
@@ -2031,9 +2031,9 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::ROTR, MVT::i32, 1 },
{ ISD::ROTR, MVT::i16, 1 },
{ ISD::ROTR, MVT::i8, 1 },
- { X86ISD::SHLD, MVT::i32, 4 },
- { X86ISD::SHLD, MVT::i16, 4 },
- { X86ISD::SHLD, MVT::i8, 4 }
+ { ISD::FSHL, MVT::i32, 4 },
+ { ISD::FSHL, MVT::i16, 4 },
+ { ISD::FSHL, MVT::i8, 4 }
};
unsigned ISD = ISD::DELETED_NODE;
@@ -2041,13 +2041,13 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
default:
break;
case Intrinsic::fshl:
- ISD = X86ISD::SHLD;
+ ISD = ISD::FSHL;
if (Args[0] == Args[1])
ISD = ISD::ROTL;
break;
case Intrinsic::fshr:
- // SHRD has same costs so don't duplicate.
- ISD = X86ISD::SHLD;
+ // FSHR has same costs so don't duplicate.
+ ISD = ISD::FSHL;
if (Args[0] == Args[1])
ISD = ISD::ROTR;
break;
OpenPOWER on IntegriCloud