diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/include/llvm/Target/TargetLowering.h | 10 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 19 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 17 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 111 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/dagcombine-buildvector.ll | 16 | 
5 files changed, 114 insertions, 59 deletions
diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h index 163f4c5ae50..ef166a26c55 100644 --- a/llvm/include/llvm/Target/TargetLowering.h +++ b/llvm/include/llvm/Target/TargetLowering.h @@ -825,11 +825,11 @@ public:    virtual bool    isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) const; -  /// isConsecutiveLoad - Return true if LD (which must be a LoadSDNode) is -  /// loading 'Bytes' bytes from a location that is 'Dist' units away from the -  /// location that the 'Base' load is loading from. -  bool isConsecutiveLoad(SDNode *LD, SDNode *Base, unsigned Bytes, int Dist, -                         const MachineFrameInfo *MFI) const; +  /// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a  +  /// location that is 'Dist' units away from the location that the 'Base' load  +  /// is loading from. +  bool isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, +                         int Dist, const MachineFrameInfo *MFI) const;    /// PerformDAGCombine - This method will be invoked for all target nodes and    /// for any target-independent nodes that the target has registered with diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5d1b2a3ea28..609ec82c5ad 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3626,30 +3626,29 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) {  SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) {    assert(N->getOpcode() == ISD::BUILD_PAIR); -  SDNode *LD1 = getBuildPairElt(N, 0); -  if (!ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse()) +  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); +  LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); +  if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())      return SDValue();    MVT LD1VT = LD1->getValueType(0); -  SDNode *LD2 = getBuildPairElt(N, 1);    const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();    if (ISD::isNON_EXTLoad(LD2) &&        LD2->hasOneUse() &&        // If both are volatile this would reduce the number of volatile loads.        // If one is volatile it might be ok, but play conservative and bail out. -      !cast<LoadSDNode>(LD1)->isVolatile() && -      !cast<LoadSDNode>(LD2)->isVolatile() && +      !LD1->isVolatile() && +      !LD2->isVolatile() &&        TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) { -    LoadSDNode *LD = cast<LoadSDNode>(LD1); -    unsigned Align = LD->getAlignment(); +    unsigned Align = LD1->getAlignment();      unsigned NewAlign = TLI.getTargetData()->        getABITypeAlignment(VT.getTypeForMVT());      if (NewAlign <= Align &&          (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) -      return DAG.getLoad(VT, N->getDebugLoc(), LD->getChain(), LD->getBasePtr(), -                         LD->getSrcValue(), LD->getSrcValueOffset(), -                         false, Align); +      return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), +                         LD1->getBasePtr(), LD1->getSrcValue(), +                         LD1->getSrcValueOffset(), false, Align);    }    return SDValue(); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 3334e53f0fb..ab4cd515531 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2070,13 +2070,13 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA,  } -/// isConsecutiveLoad - Return true if LD (which must be a LoadSDNode) is -/// loading 'Bytes' bytes from a location that is 'Dist' units away from the -/// location that the 'Base' load is loading from. -bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base, -                                       unsigned Bytes, int Dist, +/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a  +/// location that is 'Dist' units away from the location that the 'Base' load  +/// is loading from. +bool TargetLowering::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,  +                                       unsigned Bytes, int Dist,                                          const MachineFrameInfo *MFI) const { -  if (LD->getOperand(0).getNode() != Base->getOperand(0).getNode()) +  if (LD->getChain() != Base->getChain())      return false;    MVT VT = LD->getValueType(0);    if (VT.getSizeInBits() / 8 != Bytes) @@ -2094,6 +2094,11 @@ bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base,      if (FS != BFS || FS != (int)Bytes) return false;      return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);    } +  if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) { +    ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1)); +    if (V && (V->getSExtValue() == Dist*Bytes)) +      return true; +  }    GlobalValue *GV1 = NULL;    GlobalValue *GV2 = NULL; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 924155c4505..77c9f3d02a6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7675,8 +7675,9 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,      if (Elt.getOpcode() == ISD::UNDEF)        continue; -    if (!TLI.isConsecutiveLoad(Elt.getNode(), Base, -                               EVT.getSizeInBits()/8, i, MFI)) +    LoadSDNode *LD = cast<LoadSDNode>(Elt); +    LoadSDNode *LDBase = cast<LoadSDNode>(Base); +    if (!TLI.isConsecutiveLoad(LD, LDBase, EVT.getSizeInBits()/8, i, MFI))        return false;    }    return true; @@ -7751,44 +7752,82 @@ static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG,    MVT VT = N->getValueType(0);    MVT EVT = VT.getVectorElementType(); -  if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit()) -    // We are looking for load i64 and zero extend. We want to transform -    // it before legalizer has a chance to expand it. Also look for i64 -    // BUILD_PAIR bit casted to f64. -    return SDValue(); -  // This must be an insertion into a zero vector. -  SDValue HighElt = N->getOperand(1); -  if (!isZeroNode(HighElt)) -    return SDValue(); +   +  // Before or during type legalization, we want to try and convert a +  // build_vector of an i64 load and a zero value into vzext_movl before the  +  // legalizer can break it up.   +  // FIXME: does the case below remove the need to do this? +  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) { +    if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit()) +      return SDValue(); +     +    // This must be an insertion into a zero vector. +    SDValue HighElt = N->getOperand(1); +    if (!isZeroNode(HighElt)) +      return SDValue(); +     +    // Value must be a load. +    SDNode *Base = N->getOperand(0).getNode(); +    if (!isa<LoadSDNode>(Base)) { +      if (Base->getOpcode() != ISD::BIT_CONVERT) +        return SDValue(); +      Base = Base->getOperand(0).getNode(); +      if (!isa<LoadSDNode>(Base)) +        return SDValue(); +    } +     +    // Transform it into VZEXT_LOAD addr. +    LoadSDNode *LD = cast<LoadSDNode>(Base); +     +    // Load must not be an extload. +    if (LD->getExtensionType() != ISD::NON_EXTLOAD) +      return SDValue(); +     +    // Load type should legal type so we don't have to legalize it. +    if (!TLI.isTypeLegal(VT)) +      return SDValue(); +     +    SDVTList Tys = DAG.getVTList(VT, MVT::Other); +    SDValue Ops[] = { LD->getChain(), LD->getBasePtr() }; +    SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); +    TargetLowering::TargetLoweringOpt TLO(DAG); +    TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1)); +    DCI.CommitTargetLoweringOpt(TLO); +    return ResNode; +  } + +  // The type legalizer will have broken apart v2i64 build_vector created during +  // widening before the code which handles that case is run.  Look for build +  // vector (load, load + 4, 0/undef, 0/undef) +  if (VT == MVT::v4i32 || VT == MVT::v4f32) { +    LoadSDNode *LD0 = dyn_cast<LoadSDNode>(N->getOperand(0)); +    LoadSDNode *LD1 = dyn_cast<LoadSDNode>(N->getOperand(1)); +    if (!LD0 || !LD1) +      return SDValue(); +    if (LD0->getExtensionType() != ISD::NON_EXTLOAD || +        LD1->getExtensionType() != ISD::NON_EXTLOAD) +      return SDValue(); +    // Make sure the second elt is a consecutive load. +    if (!TLI.isConsecutiveLoad(LD1, LD0, EVT.getSizeInBits()/8, 1, +                               DAG.getMachineFunction().getFrameInfo())) +      return SDValue(); -  // Value must be a load. -  SDNode *Base = N->getOperand(0).getNode(); -  if (!isa<LoadSDNode>(Base)) { -    if (Base->getOpcode() != ISD::BIT_CONVERT) +    SDValue N2 = N->getOperand(2); +    SDValue N3 = N->getOperand(3); +    if (!isZeroNode(N2) && N2.getOpcode() != ISD::UNDEF)        return SDValue(); -    Base = Base->getOperand(0).getNode(); -    if (!isa<LoadSDNode>(Base)) +    if (!isZeroNode(N3) && N3.getOpcode() != ISD::UNDEF)        return SDValue(); +     +    SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); +    SDValue Ops[] = { LD0->getChain(), LD0->getBasePtr() }; +    SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); +    TargetLowering::TargetLoweringOpt TLO(DAG); +    TLO.CombineTo(SDValue(LD0, 1), ResNode.getValue(1)); +    DCI.CommitTargetLoweringOpt(TLO); +    return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode);    } - -  // Transform it into VZEXT_LOAD addr. -  LoadSDNode *LD = cast<LoadSDNode>(Base); - -  // Load must not be an extload. -  if (LD->getExtensionType() != ISD::NON_EXTLOAD) -    return SDValue(); - -  // Load type should legal type so we don't have to legalize it. -  if (!TLI.isTypeLegal(VT)) -    return SDValue(); - -  SDVTList Tys = DAG.getVTList(VT, MVT::Other); -  SDValue Ops[] = { LD->getChain(), LD->getBasePtr() }; -  SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); -  TargetLowering::TargetLoweringOpt TLO(DAG); -  TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1)); -  DCI.CommitTargetLoweringOpt(TLO); -  return ResNode; +  return SDValue();  }  /// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. diff --git a/llvm/test/CodeGen/X86/dagcombine-buildvector.ll b/llvm/test/CodeGen/X86/dagcombine-buildvector.ll index c89a296d0db..b96fdfc03c6 100644 --- a/llvm/test/CodeGen/X86/dagcombine-buildvector.ll +++ b/llvm/test/CodeGen/X86/dagcombine-buildvector.ll @@ -1,13 +1,25 @@ -; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f +; RUN: llvm-as < %s | llc -march=x86 -mcpu=penryn -disable-mmx -o %t -f  ; RUN: grep unpcklpd %t | count 1  ; RUN: grep movapd %t | count 1 +; RUN: grep movaps %t | count 1  ; Shows a dag combine bug that will generate an illegal build vector  ; with v2i64 build_vector i32, i32. -define void @test(<2 x double>* %dst, <4 x double> %src) { +define void @test(<2 x double>* %dst, <4 x double> %src) nounwind {  entry:          %tmp7.i = shufflevector <4 x double> %src, <4 x double> undef, <2 x i32> < i32 0, i32 2 >          store <2 x double> %tmp7.i, <2 x double>* %dst          ret void  } + +define void @test2(<4 x i16>* %src, <4 x i32>* %dest) nounwind { +entry: +        %tmp1 = load <4 x i16>* %src +        %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> +        %0 = tail call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp3) +        store <4 x i32> %0, <4 x i32>* %dest +        ret void +} + +declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone  | 

