summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp58
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h25
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrVSX.td18
3 files changed, 100 insertions, 1 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 2c4e1901ef2..8da29396b71 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -851,6 +851,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::SINT_TO_FP);
+ setTargetDAGCombine(ISD::BUILD_VECTOR);
if (Subtarget.hasFPCVT())
setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::LOAD);
@@ -1041,6 +1042,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::MFVSR: return "PPCISD::MFVSR";
case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
+ case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
+ case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT";
case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT";
case PPCISD::VCMP: return "PPCISD::VCMP";
@@ -10188,6 +10191,59 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
ShiftCst);
}
+SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR &&
+ "Should be called with a BUILD_VECTOR node");
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+ if (N->getValueType(0) != MVT::v2f64 || !Subtarget.hasVSX())
+ return SDValue();
+
+ // Looking for:
+ // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
+ if (N->getOperand(0).getOpcode() != ISD::SINT_TO_FP &&
+ N->getOperand(0).getOpcode() != ISD::UINT_TO_FP)
+ return SDValue();
+ if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
+ N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
+ return SDValue();
+ if (N->getOperand(0).getOpcode() != N->getOperand(1).getOpcode())
+ return SDValue();
+
+ SDValue Ext1 = N->getOperand(0).getOperand(0);
+ SDValue Ext2 = N->getOperand(1).getOperand(0);
+ if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+
+ ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
+ ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
+ if (!Ext1Op || !Ext2Op)
+ return SDValue();
+ if (Ext1.getValueType() != MVT::i32 ||
+ Ext2.getValueType() != MVT::i32)
+ if (Ext1.getOperand(0) != Ext2.getOperand(0))
+ return SDValue();
+
+ int FirstElem = Ext1Op->getZExtValue();
+ int SecondElem = Ext2Op->getZExtValue();
+ int SubvecIdx;
+ if (FirstElem == 0 && SecondElem == 1)
+ SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
+ else if (FirstElem == 2 && SecondElem == 3)
+ SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
+ else
+ return SDValue();
+
+ SDValue SrcVec = Ext1.getOperand(0);
+ auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
+ PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
+ return DAG.getNode(NodeType, dl, MVT::v2f64,
+ SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
+}
+
SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
DAGCombinerInfo &DCI) const {
assert((N->getOpcode() == ISD::SINT_TO_FP ||
@@ -11024,6 +11080,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
break;
}
+ case ISD::BUILD_VECTOR:
+ return DAGCombineBuildVector(N, DCI);
}
return SDValue();
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 5f4795b6ceb..14dfc45bcdc 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -137,6 +137,16 @@ namespace llvm {
/// Direct move from a GPR to a VSX register (zero)
MTVSRZ,
+ /// Extract a subvector from signed integer vector and convert to FP.
+ /// It is primarily used to convert a (widened) illegal integer vector
+ /// type to a legal floating point vector type.
+ /// For example v2i32 -> widened to v4i32 -> v2f64
+ SINT_VEC_TO_FP,
+
+ /// Extract a subvector from unsigned integer vector and convert to FP.
+ /// As with SINT_VEC_TO_FP, used for converting illegal types.
+ UINT_VEC_TO_FP,
+
// FIXME: Remove these once the ANDI glue bug is fixed:
/// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
/// eq or gt bit of CR0 after executing andi. x, 1. This is used to
@@ -432,6 +442,20 @@ namespace llvm {
/// DAG node.
const char *getTargetNodeName(unsigned Opcode) const override;
+ /// getPreferredVectorAction - The code we generate when vector types are
+ /// legalized by promoting the integer element type is often much worse
+ /// than code we generate if we widen the type for applicable vector types.
+ /// The issue with promoting is that the vector is scalaraized, individual
+ /// elements promoted and then the vector is rebuilt. So say we load a pair
+ /// of v4i8's and shuffle them. This will turn into a mess of 8 extending
+ /// loads, moves back into VSR's (or memory ops if we don't have moves) and
+ /// then the VPERM for the shuffle. All in all a very slow sequence.
+ TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT)
+ const override {
+ if (VT.getVectorElementType().getSizeInBits() % 8 == 0)
+ return TypeWidenVector;
+ return TargetLoweringBase::getPreferredVectorAction(VT);
+ }
bool useSoftFloat() const override;
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
@@ -883,6 +907,7 @@ namespace llvm {
SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index bc91fb6874b..f461f2bfcf2 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -57,6 +57,9 @@ def SDT_PPCstxvd2x : SDTypeProfile<0, 2, [
def SDT_PPCxxswapd : SDTypeProfile<1, 1, [
SDTCisSameAs<0, 1>
]>;
+def SDTVecConv : SDTypeProfile<1, 2, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>
+]>;
def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
[SDNPHasChain, SDNPMayLoad]>;
@@ -66,6 +69,8 @@ def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>;
def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>;
def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>;
def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>;
+def PPCsvec2fp : SDNode<"PPCISD::SINT_VEC_TO_FP", SDTVecConv, []>;
+def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>;
multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
string asmstr, InstrItinClass itin, Intrinsic Int,
@@ -608,7 +613,8 @@ let Uses = [RM] in {
"xvcvsxwdp $XT, $XB", IIC_VecFP, []>;
def XVCVSXWSP : XX2Form<60, 184,
(outs vsrc:$XT), (ins vsrc:$XB),
- "xvcvsxwsp $XT, $XB", IIC_VecFP, []>;
+ "xvcvsxwsp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (sint_to_fp v4i32:$XB))]>;
def XVCVUXDDP : XX2Form<60, 488,
(outs vsrc:$XT), (ins vsrc:$XB),
"xvcvuxddp $XT, $XB", IIC_VecFP,
@@ -928,6 +934,16 @@ def : Pat<(sext_inreg v2i64:$C, v2i32),
def : Pat<(v2f64 (sint_to_fp (sext_inreg v2i64:$C, v2i32))),
(XVCVSXWDP (XXSLDWI $C, $C, 1))>;
+def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 0)),
+ (v2f64 (XVCVSXWDP (v2i64 (XXMRGHW $C, $C))))>;
+def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 1)),
+ (v2f64 (XVCVSXWDP (v2i64 (XXMRGLW $C, $C))))>;
+
+def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 0)),
+ (v2f64 (XVCVUXWDP (v2i64 (XXMRGHW $C, $C))))>;
+def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)),
+ (v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>;
+
// Loads.
def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
OpenPOWER on IntegriCloud