diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 34 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 14 |
2 files changed, 35 insertions, 13 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d04d455a0e4..ed9e1016e70 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -16537,6 +16537,10 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2); if (Op.getOpcode() == ISD::SHL) { + // Simple i8 add case + if (ShiftAmt == 1) + return DAG.getNode(ISD::ADD, dl, VT, R, R); + // Make a large shift. SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT, R, ShiftAmt, DAG); @@ -16881,13 +16885,31 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, // the extra overheads to get from v16i8 to v8i32 make the existing SSE // solution better. if (Subtarget->hasInt256() && VT == MVT::v8i16) { - MVT NewVT = VT == MVT::v8i16 ? MVT::v8i32 : MVT::v16i16; + MVT ExtVT = MVT::v8i32; unsigned ExtOpc = Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; - R = DAG.getNode(ExtOpc, dl, NewVT, R); - Amt = DAG.getNode(ISD::ANY_EXTEND, dl, NewVT, Amt); + R = DAG.getNode(ExtOpc, dl, ExtVT, R); + Amt = DAG.getNode(ISD::ANY_EXTEND, dl, ExtVT, Amt); return DAG.getNode(ISD::TRUNCATE, dl, VT, - DAG.getNode(Op.getOpcode(), dl, NewVT, R, Amt)); + DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt)); + } + + if (Subtarget->hasInt256() && VT == MVT::v16i16) { + MVT ExtVT = MVT::v8i32; + SDValue Z = getZeroVector(VT, Subtarget, DAG, dl); + SDValue ALo = DAG.getNode(X86ISD::UNPCKL, dl, VT, Amt, Z); + SDValue AHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, Amt, Z); + SDValue RLo = DAG.getNode(X86ISD::UNPCKL, dl, VT, R, R); + SDValue RHi = DAG.getNode(X86ISD::UNPCKH, dl, VT, R, R); + ALo = DAG.getNode(ISD::BITCAST, dl, ExtVT, ALo); + AHi = DAG.getNode(ISD::BITCAST, dl, ExtVT, AHi); + RLo = DAG.getNode(ISD::BITCAST, dl, ExtVT, RLo); + RHi = DAG.getNode(ISD::BITCAST, dl, ExtVT, RHi); + SDValue Lo = DAG.getNode(Op.getOpcode(), dl, ExtVT, RLo, ALo); + SDValue Hi = DAG.getNode(Op.getOpcode(), dl, ExtVT, RHi, AHi); + Lo = DAG.getNode(ISD::SRL, dl, ExtVT, Lo, DAG.getConstant(16, dl, ExtVT)); + Hi = DAG.getNode(ISD::SRL, dl, ExtVT, Hi, DAG.getConstant(16, dl, ExtVT)); + return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi); } // Decompose 256-bit shifts into smaller 128-bit shifts. @@ -20859,7 +20881,7 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, if (!InVec.hasOneUse()) return SDValue(); EVT BCVT = InVec.getOperand(0).getValueType(); - if (!BCVT.isVector() || + if (!BCVT.isVector() || BCVT.getVectorNumElements() != OriginalVT.getVectorNumElements()) return SDValue(); InVec = InVec.getOperand(0); @@ -20991,7 +21013,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, } EVT VT = N->getValueType(0); - + if (VT == MVT::i1 && dyn_cast<ConstantSDNode>(N->getOperand(1)) && InputVector.getOpcode() == ISD::BITCAST && dyn_cast<ConstantSDNode>(InputVector.getOperand(0))) { diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 17c86a7b9f0..bbfeba8b9d8 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -153,15 +153,15 @@ unsigned X86TTIImpl::getArithmeticInstrCost( { ISD::SHL, MVT::v4i64, 1 }, { ISD::SRL, MVT::v4i64, 1 }, - { ISD::SHL, MVT::v32i8, 42 }, // cmpeqb sequence. - { ISD::SHL, MVT::v16i16, 16*10 }, // Scalarized. + { ISD::SHL, MVT::v32i8, 42 }, // cmpeqb sequence. + { ISD::SHL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence. - { ISD::SRL, MVT::v32i8, 32*10 }, // Scalarized. - { ISD::SRL, MVT::v16i16, 8*10 }, // Scalarized. + { ISD::SRL, MVT::v32i8, 32*10 }, // Scalarized. + { ISD::SRL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence. - { ISD::SRA, MVT::v32i8, 32*10 }, // Scalarized. - { ISD::SRA, MVT::v16i16, 16*10 }, // Scalarized. - { ISD::SRA, MVT::v4i64, 4*10 }, // Scalarized. + { ISD::SRA, MVT::v32i8, 32*10 }, // Scalarized. + { ISD::SRA, MVT::v16i16, 10 }, // extend/vpsravd/pack sequence. + { ISD::SRA, MVT::v4i64, 4*10 }, // Scalarized. // Vectorizing division is a bad idea. See the SSE2 table for more comments. { ISD::SDIV, MVT::v32i8, 32*20 }, |

