diff options
| author | Michael Liao <michael.liao@intel.com> | 2012-08-14 21:24:47 +0000 | 
|---|---|---|
| committer | Michael Liao <michael.liao@intel.com> | 2012-08-14 21:24:47 +0000 | 
| commit | 34107b91779520fa4242e027011b24f377bb7f8c (patch) | |
| tree | ad2f11911a7f0a7efa4e02a62303af7582e1a292 /llvm/lib/Target | |
| parent | 25e8fe6ae04af1b2381b138e59619fb5676dbe4c (diff) | |
| download | bcm5719-llvm-34107b91779520fa4242e027011b24f377bb7f8c.tar.gz bcm5719-llvm-34107b91779520fa4242e027011b24f377bb7f8c.zip | |
fix PR11334
- FP_EXTEND only support extending from vectors with matching elements.
  This results in the scalarization of extending to v2f64 from v2f32,
  which will be legalized to v4f32 not matching with v2f64.
- add X86-specific VFPEXT supproting extending from v4f32 to v2f64.
- add BUILD_VECTOR lowering helper to recover back the original
  extending from v4f32 to v2f64.
- test case is enhanced to include different vector width.
llvm-svn: 161894
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 81 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 8 | 
4 files changed, 99 insertions, 0 deletions
| diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ea66a6115d7..648637229f6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5114,6 +5114,82 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {    return SDValue();  } +// LowerVectorFpExtend - Recognize the scalarized FP_EXTEND from v2f32 to v2f64 +// and convert it into X86ISD::VFPEXT due to the current ISD::FP_EXTEND has the +// constraint of matching input/output vector elements. +SDValue +X86TargetLowering::LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const { +  DebugLoc DL = Op.getDebugLoc(); +  SDNode *N = Op.getNode(); +  EVT VT = Op.getValueType(); +  unsigned NumElts = Op.getNumOperands(); + +  // Check supported types and sub-targets. +  // +  // Only v2f32 -> v2f64 needs special handling. +  if (VT != MVT::v2f64 || !Subtarget->hasSSE2()) +    return SDValue(); + +  SDValue VecIn; +  EVT VecInVT; +  SmallVector<int, 8> Mask; +  EVT SrcVT = MVT::Other; + +  // Check the patterns could be translated into X86vfpext. +  for (unsigned i = 0; i < NumElts; ++i) { +    SDValue In = N->getOperand(i); +    unsigned Opcode = In.getOpcode(); + +    // Skip if the element is undefined. +    if (Opcode == ISD::UNDEF) { +      Mask.push_back(-1); +      continue; +    } + +    // Quit if one of the elements is not defined from 'fpext'. +    if (Opcode != ISD::FP_EXTEND) +      return SDValue(); + +    // Check how the source of 'fpext' is defined. +    SDValue L2In = In.getOperand(0); +    EVT L2InVT = L2In.getValueType(); + +    // Check the original type +    if (SrcVT == MVT::Other) +      SrcVT = L2InVT; +    else if (SrcVT != L2InVT) // Quit if non-homogenous typed. +      return SDValue(); + +    // Check whether the value being 'fpext'ed is extracted from the same +    // source. +    Opcode = L2In.getOpcode(); + +    // Quit if it's not extracted with a constant index. +    if (Opcode != ISD::EXTRACT_VECTOR_ELT || +        !isa<ConstantSDNode>(L2In.getOperand(1))) +      return SDValue(); + +    SDValue ExtractedFromVec = L2In.getOperand(0); + +    if (VecIn.getNode() == 0) { +      VecIn = ExtractedFromVec; +      VecInVT = ExtractedFromVec.getValueType(); +    } else if (VecIn != ExtractedFromVec) // Quit if built from more than 1 vec. +      return SDValue(); + +    Mask.push_back(cast<ConstantSDNode>(L2In.getOperand(1))->getZExtValue()); +  } + +  // Fill the remaining mask as undef. +  for (unsigned i = NumElts; i < VecInVT.getVectorNumElements(); ++i) +    Mask.push_back(-1); + +  return DAG.getNode(X86ISD::VFPEXT, DL, VT, +                     DAG.getVectorShuffle(VecInVT, DL, +                                          VecIn, DAG.getUNDEF(VecInVT), +                                          &Mask[0])); +} +  SDValue  X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {    DebugLoc dl = Op.getDebugLoc(); @@ -5146,6 +5222,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {    if (Broadcast.getNode())      return Broadcast; +  SDValue FpExt = LowerVectorFpExtend(Op, DAG); +  if (FpExt.getNode()) +    return FpExt; +    unsigned EVTBits = ExtVT.getSizeInBits();    unsigned NumZero  = 0; @@ -11343,6 +11423,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {    case X86ISD::ATOMNAND64_DAG:     return "X86ISD::ATOMNAND64_DAG";    case X86ISD::VZEXT_MOVL:         return "X86ISD::VZEXT_MOVL";    case X86ISD::VZEXT_LOAD:         return "X86ISD::VZEXT_LOAD"; +  case X86ISD::VFPEXT:             return "X86ISD::VFPEXT";    case X86ISD::VSHLDQ:             return "X86ISD::VSHLDQ";    case X86ISD::VSRLDQ:             return "X86ISD::VSRLDQ";    case X86ISD::VSHL:               return "X86ISD::VSHL"; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 9123ebd8ae4..c8a04c02d07 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -227,6 +227,9 @@ namespace llvm {        // VSEXT_MOVL - Vector move low and sign extend.        VSEXT_MOVL, +      // VFPEXT - Vector FP extend. +      VFPEXT, +        // VSHL, VSRL - 128-bit vector logical left / right shift        VSHLDQ, VSRLDQ, @@ -828,6 +831,8 @@ namespace llvm {      SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const;      SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const; +    SDValue LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const; +      virtual SDValue        LowerFormalArguments(SDValue Chain,                             CallingConv::ID CallConv, bool isVarArg, diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index d13167bb05d..1db68c86b76 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -81,6 +81,11 @@ def X86vsmovl  : SDNode<"X86ISD::VSEXT_MOVL",  def X86vzload  : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,                          [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +def X86vfpext  : SDNode<"X86ISD::VFPEXT", +                        SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, +                                             SDTCisFP<0>, SDTCisFP<1>]>>; +  def X86vshldq  : SDNode<"X86ISD::VSHLDQ",    SDTIntShiftOp>;  def X86vshrdq  : SDNode<"X86ISD::VSRLDQ",    SDTIntShiftOp>;  def X86cmpp    : SDNode<"X86ISD::CMPP",      SDTX86VFCMP>; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index e4c35b9bc55..20dc81eb4a3 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -2101,12 +2101,20 @@ let Predicates = [HasAVX] in {    def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),              (VCVTPD2PSYrm addr:$src)>; +  def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))), +            (VCVTPS2PDrr VR128:$src)>;    def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),              (VCVTPS2PDYrr VR128:$src)>;    def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),              (VCVTPS2PDYrm addr:$src)>;  } +let Predicates = [HasSSE2] in { +  // Match fextend for 128 conversions +  def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))), +            (CVTPS2PDrr VR128:$src)>; +} +  //===----------------------------------------------------------------------===//  // SSE 1 & 2 - Compare Instructions  //===----------------------------------------------------------------------===// | 

