From 1ac6e9636c9e8df2516bf4106d1664af7448dbf4 Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Fri, 10 May 2019 14:04:06 +0000 Subject: [PowerPC] custom lower `v2f64 fpext v2f32` Reduces scalarization overhead via custom lowering of v2f64 fpext v2f32. eg. For the following IR %0 = load <2 x float>, <2 x float>* %Ptr, align 8 %1 = fpext <2 x float> %0 to <2 x double> ret <2 x double> %1 Pre custom lowering: ld r3, 0(r3) mtvsrd f0, r3 xxswapd vs34, vs0 xscvspdpn f0, vs0 xxsldwi vs1, vs34, vs34, 3 xscvspdpn f1, vs1 xxmrghd vs34, vs0, vs1 After custom lowering: lfd f0, 0(r3) xxmrghw vs0, vs0, vs0 xvcvspdp vs34, vs0 Differential Revision: https://reviews.llvm.org/D57857 llvm-svn: 360429 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 57 +++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'llvm/lib/Target/PowerPC/PPCISelLowering.cpp') diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 9ff817e2f50..79be3265892 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -877,6 +877,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FPOWI, MVT::f128, Expand); setOperationAction(ISD::FREM, MVT::f128, Expand); } + setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); } @@ -1378,6 +1379,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128"; case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI"; + case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH"; + case PPCISD::FP_EXTEND_LH: return "PPCISD::FP_EXTEND_LH"; } return nullptr; } @@ -9608,6 +9611,59 @@ SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const { return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT); } +// Custom lowering for fpext vf32 to v2f64 +SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { + + assert(Op.getOpcode() == ISD::FP_EXTEND && + "Should only be called for ISD::FP_EXTEND"); + + // We only want to custom lower an extend from v2f32 to v2f64. + if (Op.getValueType() != MVT::v2f64 || + Op.getOperand(0).getValueType() != MVT::v2f32) + return SDValue(); + + SDLoc dl(Op); + SDValue Op0 = Op.getOperand(0); + + switch (Op0.getOpcode()) { + default: + return SDValue(); + case ISD::FADD: + case ISD::FMUL: + case ISD::FSUB: { + SDValue NewLoad[2]; + for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) { + // Ensure both input are loads. + SDValue LdOp = Op0.getOperand(i); + if (LdOp.getOpcode() != ISD::LOAD) + return SDValue(); + // Generate new load node. + LoadSDNode *LD = cast(LdOp); + SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() }; + NewLoad[i] = + DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl, + DAG.getVTList(MVT::v4f32, MVT::Other), + LoadOps, LD->getMemoryVT(), + LD->getMemOperand()); + } + SDValue NewOp = DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, + NewLoad[0], NewLoad[1], + Op0.getNode()->getFlags()); + return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewOp); + } + case ISD::LOAD: { + LoadSDNode *LD = cast(Op0); + SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() }; + SDValue NewLd = + DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl, + DAG.getVTList(MVT::v4f32, MVT::Other), + LoadOps, LD->getMemoryVT(), LD->getMemOperand()); + return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewLd); + } + } + llvm_unreachable("ERROR:Should return for all cases within swtich."); +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -9661,6 +9717,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); case ISD::ABS: return LowerABS(Op, DAG); + case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); // For counter-based loop handling. case ISD::INTRINSIC_W_CHAIN: return SDValue(); -- cgit v1.2.3