summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorNemanja Ivanovic <nemanja.i.ibm@gmail.com>2018-12-29 13:40:48 +0000
committerNemanja Ivanovic <nemanja.i.ibm@gmail.com>2018-12-29 13:40:48 +0000
commit0f7715afe1f15174204ca62f44e53f56b462bf19 (patch)
treef3c06a107696ab18f4de90b4c3d64b108fea87dd /llvm/lib
parent763c8973bfa4534b503f31b4d716a523d6b9fd8e (diff)
downloadbcm5719-llvm-0f7715afe1f15174204ca62f44e53f56b462bf19.tar.gz
bcm5719-llvm-0f7715afe1f15174204ca62f44e53f56b462bf19.zip
[PowerPC] Complete the custom legalization of vector int to fp conversion
A recent patch has added custom legalization of vector conversions of v2i16 -> v2f64. This just rounds it out for other types where the input vector has an illegal (narrower) type than the result vector. Specifically, this will handle the following conversions: v2i8 -> v2f64 v4i8 -> v4f32 v4i16 -> v4f32 Differential revision: https://reviews.llvm.org/D54663 llvm-svn: 350155
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp59
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrVSX.td79
2 files changed, 93 insertions, 45 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index e296c45728c..4442746031d 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -788,8 +788,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
+ // Custom handling for partial vectors of integers converted to
+ // floating point. We already have optimal handling for v2i32 through
+ // the DAG combine, so those aren't necessary.
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
@@ -7288,43 +7297,49 @@ static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
}
-SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op,
- SelectionDAG &DAG,
+SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
const SDLoc &dl) const {
unsigned Opc = Op.getOpcode();
assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP) &&
"Unexpected conversion type");
- assert(Op.getValueType() == MVT::v2f64 && "Supports v2f64 only.");
+ assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
+ "Supports conversions to v2f64/v4f32 only.");
- // CPU's prior to P9 don't have a way to sign-extend in vectors.
bool SignedConv = Opc == ISD::SINT_TO_FP;
- if (SignedConv && !Subtarget.hasP9Altivec())
- return SDValue();
+ bool FourEltRes = Op.getValueType() == MVT::v4f32;
SDValue Wide = widenVec(DAG, Op.getOperand(0), dl);
EVT WideVT = Wide.getValueType();
unsigned WideNumElts = WideVT.getVectorNumElements();
+ MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
SmallVector<int, 16> ShuffV;
for (unsigned i = 0; i < WideNumElts; ++i)
ShuffV.push_back(i + WideNumElts);
- if (Subtarget.isLittleEndian()) {
- ShuffV[0] = 0;
- ShuffV[WideNumElts / 2] = 1;
- }
- else {
- ShuffV[WideNumElts / 2 - 1] = 0;
- ShuffV[WideNumElts - 1] = 1;
- }
+ int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
+ int SaveElts = FourEltRes ? 4 : 2;
+ if (Subtarget.isLittleEndian())
+ for (int i = 0; i < SaveElts; i++)
+ ShuffV[i * Stride] = i;
+ else
+ for (int i = 1; i <= SaveElts; i++)
+ ShuffV[i * Stride - 1] = i - 1;
- SDValue ShuffleSrc2 = SignedConv ? DAG.getUNDEF(WideVT) :
- DAG.getConstant(0, dl, WideVT);
+ SDValue ShuffleSrc2 =
+ SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
- unsigned ExtendOp = SignedConv ? (unsigned) PPCISD::SExtVElems :
- (unsigned) ISD::BITCAST;
- SDValue Extend = DAG.getNode(ExtendOp, dl, MVT::v2i64, Arrange);
+ unsigned ExtendOp =
+ SignedConv ? (unsigned)PPCISD::SExtVElems : (unsigned)ISD::BITCAST;
+
+ SDValue Extend;
+ if (!Subtarget.hasP9Altivec() && SignedConv) {
+ Arrange = DAG.getBitcast(IntermediateVT, Arrange);
+ Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
+ DAG.getValueType(Op.getOperand(0).getValueType()));
+ } else
+ Extend = DAG.getNode(ExtendOp, dl, IntermediateVT, Arrange);
return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
}
@@ -7333,8 +7348,10 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
- if (Op.getValueType() == MVT::v2f64 &&
- Op.getOperand(0).getValueType() == MVT::v2i16)
+ EVT InVT = Op.getOperand(0).getValueType();
+ EVT OutVT = Op.getValueType();
+ if (OutVT.isVector() && OutVT.isFloatingPoint() &&
+ isOperationCustom(Op.getOpcode(), InVT))
return LowerINT_TO_FPVector(Op, DAG, dl);
// Conversions to f128 are legal.
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 8321d7f2ecf..0f073388dc7 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -1217,6 +1217,27 @@ def ScalarLoads {
dag Li32 = (i32 (load xoaddr:$src));
}
+def DWToSPExtractConv {
+ dag El0US1 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0))))));
+ dag El1US1 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1))))));
+ dag El0US2 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0))))));
+ dag El1US2 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1))))));
+ dag El0SS1 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0))))));
+ dag El1SS1 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1))))));
+ dag El0SS2 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0))))));
+ dag El1SS2 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1))))));
+ dag BVU = (v4f32 (build_vector El0US1, El1US1, El0US2, El1US2));
+ dag BVS = (v4f32 (build_vector El0SS1, El1SS1, El0SS2, El1SS2));
+}
+
// The following VSX instructions were introduced in Power ISA 2.07
/* FIXME: if the operands are v2i64, these patterns will not match.
we should define new patterns or otherwise match the same patterns
@@ -1452,35 +1473,27 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
} // UseVSXReg = 1
let Predicates = [IsLittleEndian] in {
- def : Pat<(f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
- (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
- def : Pat<(f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
+ def : Pat<DWToSPExtractConv.El0SS1,
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El1SS1,
(f32 (XSCVSXDSP (COPY_TO_REGCLASS
- (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
- def : Pat<(f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
- (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
- def : Pat<(f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
+ (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El0US1,
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El1US1,
(f32 (XSCVUXDSP (COPY_TO_REGCLASS
- (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
+ (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
}
let Predicates = [IsBigEndian] in {
- def : Pat<(f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
- (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S, VSFRC)))>;
- def : Pat<(f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
- (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
- def : Pat<(f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 0)))))),
- (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S, VSFRC)))>;
- def : Pat<(f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
- (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El0SS1,
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El1SS1,
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El0US1,
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
+ def : Pat<DWToSPExtractConv.El1US1,
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
}
// Instructions for converting float to i64 feeding a store.
@@ -3814,6 +3827,15 @@ let AddedComplexity = 400 in {
(XFLOADf32 xoaddr:$A), VSFRC)), 0))>;
}
+ let Predicates = [IsBigEndian, HasP8Vector] in {
+ def : Pat<DWToSPExtractConv.BVU,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3),
+ (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3)))>;
+ def : Pat<DWToSPExtractConv.BVS,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3),
+ (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3)))>;
+ }
+
// Big endian, available on all targets with VSX
let Predicates = [IsBigEndian, HasVSX] in {
def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
@@ -3842,6 +3864,15 @@ let AddedComplexity = 400 in {
(v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>;
}
+ let Predicates = [IsLittleEndian, HasP8Vector] in {
+ def : Pat<DWToSPExtractConv.BVU,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3),
+ (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3)))>;
+ def : Pat<DWToSPExtractConv.BVS,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3),
+ (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3)))>;
+ }
+
let Predicates = [IsLittleEndian, HasVSX] in {
// Little endian, available on all targets with VSX
def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
OpenPOWER on IntegriCloud