diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 57 | ||||
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.h | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrVSX.td | 19 | ||||
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/reduce_scalarization.ll | 77 | 
4 files changed, 161 insertions, 0 deletions
| diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 9ff817e2f50..79be3265892 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -877,6 +877,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,          setOperationAction(ISD::FPOWI, MVT::f128, Expand);          setOperationAction(ISD::FREM, MVT::f128, Expand);        } +      setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);      } @@ -1378,6 +1379,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {    case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";    case PPCISD::BUILD_FP128:     return "PPCISD::BUILD_FP128";    case PPCISD::EXTSWSLI:        return "PPCISD::EXTSWSLI"; +  case PPCISD::LD_VSX_LH:       return "PPCISD::LD_VSX_LH"; +  case PPCISD::FP_EXTEND_LH:    return "PPCISD::FP_EXTEND_LH";    }    return nullptr;  } @@ -9608,6 +9611,59 @@ SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {    return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);  } +// Custom lowering for fpext vf32 to v2f64 +SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { + +  assert(Op.getOpcode() == ISD::FP_EXTEND && +         "Should only be called for ISD::FP_EXTEND"); + +  // We only want to custom lower an extend from v2f32 to v2f64. +  if (Op.getValueType() != MVT::v2f64 || +      Op.getOperand(0).getValueType() != MVT::v2f32) +    return SDValue(); + +  SDLoc dl(Op); +  SDValue Op0 = Op.getOperand(0); + +  switch (Op0.getOpcode()) { +  default: +    return SDValue(); +  case ISD::FADD: +  case ISD::FMUL: +  case ISD::FSUB: { +    SDValue NewLoad[2]; +    for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) { +      // Ensure both input are loads. +      SDValue LdOp = Op0.getOperand(i); +      if (LdOp.getOpcode() != ISD::LOAD) +        return SDValue(); +      // Generate new load node. +      LoadSDNode *LD = cast<LoadSDNode>(LdOp); +      SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() }; +      NewLoad[i] = +        DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl, +                                DAG.getVTList(MVT::v4f32, MVT::Other), +                                LoadOps, LD->getMemoryVT(), +                                LD->getMemOperand()); +    } +    SDValue NewOp = DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, +                              NewLoad[0], NewLoad[1], +                              Op0.getNode()->getFlags()); +    return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewOp); +  } +  case ISD::LOAD: { +    LoadSDNode *LD = cast<LoadSDNode>(Op0); +    SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() }; +    SDValue NewLd = +      DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl, +                              DAG.getVTList(MVT::v4f32, MVT::Other), +                              LoadOps, LD->getMemoryVT(), LD->getMemOperand()); +    return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewLd); +  } +  } +  llvm_unreachable("ERROR:Should return for all cases within swtich."); +} +  /// LowerOperation - Provide custom lowering hooks for some operations.  ///  SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -9661,6 +9717,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {    case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);    case ISD::MUL:                return LowerMUL(Op, DAG);    case ISD::ABS:                return LowerABS(Op, DAG); +  case ISD::FP_EXTEND:          return LowerFP_EXTEND(Op, DAG);    // For counter-based loop handling.    case ISD::INTRINSIC_W_CHAIN:  return SDValue(); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 208306c577e..cf812a25085 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -404,6 +404,9 @@ namespace llvm {        /// representation.        QBFLT, +      /// Custom extend v4f32 to v2f64. +      FP_EXTEND_LH, +        /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a        /// byte-swapping store instruction.  It byte-swaps the low "Type" bits of        /// the GPRC input, then stores it through Ptr.  Type can be either i16 or @@ -445,6 +448,10 @@ namespace llvm {        /// an xxswapd.        LXVD2X, +      /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a +      /// v2f32 value into the lower half of a VSR register. +      LD_VSX_LH, +        /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.        /// Maps directly to an stxvd2x instruction that will be preceded by        /// an xxswapd. @@ -1021,6 +1028,7 @@ namespace llvm {      SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;      SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;      SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; +    SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;      SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;      SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index d874f30c7e8..6b97435527a 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -53,6 +53,15 @@ def PPCRegSPILLTOVSRRCAsmOperand : AsmOperandClass {  def spilltovsrrc : RegisterOperand<SPILLTOVSRRC> {    let ParserMatchClass = PPCRegSPILLTOVSRRCAsmOperand;  } + +def SDT_PPCldvsxlh : SDTypeProfile<1, 1, [ +  SDTCisVT<0, v4f32>, SDTCisPtrTy<1> +]>; + +def SDT_PPCfpextlh : SDTypeProfile<1, 1, [ +  SDTCisVT<0, v2f64>, SDTCisVT<1, v4f32> +]>; +  // Little-endian-specific nodes.  def SDT_PPClxvd2x : SDTypeProfile<1, 1, [    SDTCisVT<0, v2f64>, SDTCisPtrTy<1> @@ -84,6 +93,10 @@ def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>;  def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>;  def PPCvabsd : SDNode<"PPCISD::VABSD", SDTVabsd, []>; +def PPCfpextlh : SDNode<"PPCISD::FP_EXTEND_LH", SDT_PPCfpextlh, []>; +def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh, +                        [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +  multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,                      string asmstr, InstrItinClass itin, Intrinsic Int,                      ValueType OutTy, ValueType InTy> { @@ -1060,6 +1073,8 @@ def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 0)),  def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)),            (v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>; +def : Pat<(v2f64 (PPCfpextlh v4f32:$C)), (XVCVSPDP (XXMRGHW $C, $C))>; +  // Loads.  let Predicates = [HasVSX, HasOnlySwappingMemOps] in {    def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>; @@ -3266,6 +3281,10 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {    def : Pat<(f32 (fpround (f64 (extloadf32 ixaddr:$src)))),              (f32 (DFLOADf32 ixaddr:$src))>; +  def : Pat<(v4f32 (PPCldvsxlh xaddr:$src)), +            (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC)>; +  def : Pat<(v4f32 (PPCldvsxlh ixaddr:$src)), +            (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC)>;    let AddedComplexity = 400 in {    // The following pseudoinstructions are used to ensure the utilization diff --git a/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll b/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll new file mode 100644 index 00000000000..cbc8a936772 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/reduce_scalarization.ll @@ -0,0 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \ +; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN:     -ppc-vsr-nums-as-vr < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \ +; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN:     -ppc-vsr-nums-as-vr < %s | FileCheck %s + +; Function Attrs: norecurse nounwind readonly +define dso_local <2 x double> @test1(<2 x float>* nocapture readonly %Ptr) { +; CHECK-LABEL: test1: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    lfd f0, 0(r3) +; CHECK-NEXT:    xxmrghw vs0, vs0, vs0 +; CHECK-NEXT:    xvcvspdp v2, vs0 +; CHECK-NEXT:    blr +entry: +  %0 = load <2 x float>, <2 x float>* %Ptr, align 8 +  %1 = fpext <2 x float> %0 to <2 x double> +  ret <2 x double> %1 +} + +; Function Attrs: norecurse nounwind readonly +define dso_local <2 x double> @test2(<2 x float>* nocapture readonly %a, <2 x float>* nocapture readonly %b) { +; CHECK-LABEL: test2: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    lfd f0, 0(r4) +; CHECK-NEXT:    lfd f1, 0(r3) +; CHECK-NEXT:    xvsubsp vs0, vs1, vs0 +; CHECK-NEXT:    xxmrghw vs0, vs0, vs0 +; CHECK-NEXT:    xvcvspdp v2, vs0 +; CHECK-NEXT:    blr +entry: +  %0 = load <2 x float>, <2 x float>* %a, align 8 +  %1 = load <2 x float>, <2 x float>* %b, align 8 +  %sub = fsub <2 x float> %0, %1 +  %2 = fpext <2 x float> %sub to <2 x double> +  ret <2 x double> %2 +} + +; Function Attrs: norecurse nounwind readonly +; Function Attrs: norecurse nounwind readonly +define dso_local <2 x double> @test3(<2 x float>* nocapture readonly %a, <2 x float>* nocapture readonly %b) { +; CHECK-LABEL: test3: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    lfd f0, 0(r4) +; CHECK-NEXT:    lfd f1, 0(r3) +; CHECK-NEXT:    xvaddsp vs0, vs1, vs0 +; CHECK-NEXT:    xxmrghw vs0, vs0, vs0 +; CHECK-NEXT:    xvcvspdp v2, vs0 +; CHECK-NEXT:    blr +entry: +  %0 = load <2 x float>, <2 x float>* %a, align 8 +  %1 = load <2 x float>, <2 x float>* %b, align 8 +  %sub = fadd <2 x float> %0, %1 +  %2 = fpext <2 x float> %sub to <2 x double> +  ret <2 x double> %2 +} + +; Function Attrs: norecurse nounwind readonly +; Function Attrs: norecurse nounwind readonly +define dso_local <2 x double> @test4(<2 x float>* nocapture readonly %a, <2 x float>* nocapture readonly %b) { +; CHECK-LABEL: test4: +; CHECK:       # %bb.0: # %entry +; CHECK-NEXT:    lfd f0, 0(r4) +; CHECK-NEXT:    lfd f1, 0(r3) +; CHECK-NEXT:    xvmulsp vs0, vs1, vs0 +; CHECK-NEXT:    xxmrghw vs0, vs0, vs0 +; CHECK-NEXT:    xvcvspdp v2, vs0 +; CHECK-NEXT:    blr +entry: +  %0 = load <2 x float>, <2 x float>* %a, align 8 +  %1 = load <2 x float>, <2 x float>* %b, align 8 +  %sub = fmul <2 x float> %0, %1 +  %2 = fpext <2 x float> %sub to <2 x double> +  ret <2 x double> %2 +} | 

