diff options
author | Nemanja Ivanovic <nemanja.i.ibm@gmail.com> | 2018-12-29 13:40:48 +0000 |
---|---|---|
committer | Nemanja Ivanovic <nemanja.i.ibm@gmail.com> | 2018-12-29 13:40:48 +0000 |
commit | 0f7715afe1f15174204ca62f44e53f56b462bf19 (patch) | |
tree | f3c06a107696ab18f4de90b4c3d64b108fea87dd /llvm/lib/Target/PowerPC | |
parent | 763c8973bfa4534b503f31b4d716a523d6b9fd8e (diff) | |
download | bcm5719-llvm-0f7715afe1f15174204ca62f44e53f56b462bf19.tar.gz bcm5719-llvm-0f7715afe1f15174204ca62f44e53f56b462bf19.zip |
[PowerPC] Complete the custom legalization of vector int to fp conversion
A recent patch has added custom legalization of vector conversions of
v2i16 -> v2f64. This just rounds it out for other types where the input vector
has an illegal (narrower) type than the result vector. Specifically, this will
handle the following conversions:
v2i8 -> v2f64
v4i8 -> v4f32
v4i16 -> v4f32
Differential revision: https://reviews.llvm.org/D54663
llvm-svn: 350155
Diffstat (limited to 'llvm/lib/Target/PowerPC')
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 59 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrVSX.td | 79 |
2 files changed, 93 insertions, 45 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index e296c45728c..4442746031d 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -788,8 +788,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); + // Custom handling for partial vectors of integers converted to + // floating point. We already have optimal handling for v2i32 through + // the DAG combine, so those aren't necessary. + setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); setOperationAction(ISD::FNEG, MVT::v4f32, Legal); setOperationAction(ISD::FNEG, MVT::v2f64, Legal); @@ -7288,43 +7297,49 @@ static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) { return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops); } -SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, - SelectionDAG &DAG, +SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { unsigned Opc = Op.getOpcode(); assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) && "Unexpected conversion type"); - assert(Op.getValueType() == MVT::v2f64 && "Supports v2f64 only."); + assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) && + "Supports conversions to v2f64/v4f32 only."); - // CPU's prior to P9 don't have a way to sign-extend in vectors. bool SignedConv = Opc == ISD::SINT_TO_FP; - if (SignedConv && !Subtarget.hasP9Altivec()) - return SDValue(); + bool FourEltRes = Op.getValueType() == MVT::v4f32; SDValue Wide = widenVec(DAG, Op.getOperand(0), dl); EVT WideVT = Wide.getValueType(); unsigned WideNumElts = WideVT.getVectorNumElements(); + MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64; SmallVector<int, 16> ShuffV; for (unsigned i = 0; i < WideNumElts; ++i) ShuffV.push_back(i + WideNumElts); - if (Subtarget.isLittleEndian()) { - ShuffV[0] = 0; - ShuffV[WideNumElts / 2] = 1; - } - else { - ShuffV[WideNumElts / 2 - 1] = 0; - ShuffV[WideNumElts - 1] = 1; - } + int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2; + int SaveElts = FourEltRes ? 4 : 2; + if (Subtarget.isLittleEndian()) + for (int i = 0; i < SaveElts; i++) + ShuffV[i * Stride] = i; + else + for (int i = 1; i <= SaveElts; i++) + ShuffV[i * Stride - 1] = i - 1; - SDValue ShuffleSrc2 = SignedConv ? DAG.getUNDEF(WideVT) : - DAG.getConstant(0, dl, WideVT); + SDValue ShuffleSrc2 = + SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT); SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV); - unsigned ExtendOp = SignedConv ? (unsigned) PPCISD::SExtVElems : - (unsigned) ISD::BITCAST; - SDValue Extend = DAG.getNode(ExtendOp, dl, MVT::v2i64, Arrange); + unsigned ExtendOp = + SignedConv ? (unsigned)PPCISD::SExtVElems : (unsigned)ISD::BITCAST; + + SDValue Extend; + if (!Subtarget.hasP9Altivec() && SignedConv) { + Arrange = DAG.getBitcast(IntermediateVT, Arrange); + Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange, + DAG.getValueType(Op.getOperand(0).getValueType())); + } else + Extend = DAG.getNode(ExtendOp, dl, IntermediateVT, Arrange); return DAG.getNode(Opc, dl, Op.getValueType(), Extend); } @@ -7333,8 +7348,10 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); - if (Op.getValueType() == MVT::v2f64 && - Op.getOperand(0).getValueType() == MVT::v2i16) + EVT InVT = Op.getOperand(0).getValueType(); + EVT OutVT = Op.getValueType(); + if (OutVT.isVector() && OutVT.isFloatingPoint() && + isOperationCustom(Op.getOpcode(), InVT)) return LowerINT_TO_FPVector(Op, DAG, dl); // Conversions to f128 are legal. diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 8321d7f2ecf..0f073388dc7 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -1217,6 +1217,27 @@ def ScalarLoads { dag Li32 = (i32 (load xoaddr:$src)); } +def DWToSPExtractConv { + dag El0US1 = (f32 (PPCfcfidus + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0)))))); + dag El1US1 = (f32 (PPCfcfidus + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1)))))); + dag El0US2 = (f32 (PPCfcfidus + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0)))))); + dag El1US2 = (f32 (PPCfcfidus + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1)))))); + dag El0SS1 = (f32 (PPCfcfids + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0)))))); + dag El1SS1 = (f32 (PPCfcfids + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1)))))); + dag El0SS2 = (f32 (PPCfcfids + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0)))))); + dag El1SS2 = (f32 (PPCfcfids + (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1)))))); + dag BVU = (v4f32 (build_vector El0US1, El1US1, El0US2, El1US2)); + dag BVS = (v4f32 (build_vector El0SS1, El1SS1, El0SS2, El1SS2)); +} + // The following VSX instructions were introduced in Power ISA 2.07 /* FIXME: if the operands are v2i64, these patterns will not match. we should define new patterns or otherwise match the same patterns @@ -1452,35 +1473,27 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. } // UseVSXReg = 1 let Predicates = [IsLittleEndian] in { - def : Pat<(f32 (PPCfcfids - (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))), - (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; - def : Pat<(f32 (PPCfcfids - (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))), + def : Pat<DWToSPExtractConv.El0SS1, + (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>; + def : Pat<DWToSPExtractConv.El1SS1, (f32 (XSCVSXDSP (COPY_TO_REGCLASS - (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; - def : Pat<(f32 (PPCfcfidus - (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))), - (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; - def : Pat<(f32 (PPCfcfidus - (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))), + (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>; + def : Pat<DWToSPExtractConv.El0US1, + (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>; + def : Pat<DWToSPExtractConv.El1US1, (f32 (XSCVUXDSP (COPY_TO_REGCLASS - (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>; + (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>; } let Predicates = [IsBigEndian] in { - def : Pat<(f32 (PPCfcfids - (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))), - (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S, VSFRC)))>; - def : Pat<(f32 (PPCfcfids - (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))), - (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; - def : Pat<(f32 (PPCfcfidus - (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))), - (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S, VSFRC)))>; - def : Pat<(f32 (PPCfcfidus - (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))), - (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; + def : Pat<DWToSPExtractConv.El0SS1, + (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>; + def : Pat<DWToSPExtractConv.El1SS1, + (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>; + def : Pat<DWToSPExtractConv.El0US1, + (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>; + def : Pat<DWToSPExtractConv.El1US1, + (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>; } // Instructions for converting float to i64 feeding a store. @@ -3814,6 +3827,15 @@ let AddedComplexity = 400 in { (XFLOADf32 xoaddr:$A), VSFRC)), 0))>; } + let Predicates = [IsBigEndian, HasP8Vector] in { + def : Pat<DWToSPExtractConv.BVU, + (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3), + (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3)))>; + def : Pat<DWToSPExtractConv.BVS, + (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3), + (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3)))>; + } + // Big endian, available on all targets with VSX let Predicates = [IsBigEndian, HasVSX] in { def : Pat<(v2f64 (build_vector f64:$A, f64:$B)), @@ -3842,6 +3864,15 @@ let AddedComplexity = 400 in { (v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>; } + let Predicates = [IsLittleEndian, HasP8Vector] in { + def : Pat<DWToSPExtractConv.BVU, + (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3), + (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3)))>; + def : Pat<DWToSPExtractConv.BVS, + (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3), + (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3)))>; + } + let Predicates = [IsLittleEndian, HasVSX] in { // Little endian, available on all targets with VSX def : Pat<(v2f64 (build_vector f64:$A, f64:$B)), |