diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 49 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 12 |
2 files changed, 44 insertions, 17 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c0e8a72d1f9..9c0b5185017 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5601,6 +5601,24 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts, } } + auto CreateLoad = [&DAG, &DL](EVT VT, LoadSDNode *LDBase) { + SDValue NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), + LDBase->getBasePtr(), LDBase->getPointerInfo(), + LDBase->isVolatile(), LDBase->isNonTemporal(), + LDBase->isInvariant(), LDBase->getAlignment()); + + if (LDBase->hasAnyUseOfValue(1)) { + SDValue NewChain = + DAG.getNode(ISD::TokenFactor, DL, MVT::Other, SDValue(LDBase, 1), + SDValue(NewLd.getNode(), 1)); + DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain); + DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1), + SDValue(NewLd.getNode(), 1)); + } + + return NewLd; + }; + // LOAD - all consecutive load/undefs (must start/end with a load). // If we have found an entire vector of loads and undefs, then return a large // load of the entire vector width starting at the base pointer. @@ -5616,23 +5634,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts, if (isAfterLegalize && !TLI.isOperationLegal(ISD::LOAD, VT)) return SDValue(); - SDValue NewLd = SDValue(); - - NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), - LDBase->getPointerInfo(), LDBase->isVolatile(), - LDBase->isNonTemporal(), LDBase->isInvariant(), - LDBase->getAlignment()); - - if (LDBase->hasAnyUseOfValue(1)) { - SDValue NewChain = - DAG.getNode(ISD::TokenFactor, DL, MVT::Other, SDValue(LDBase, 1), - SDValue(NewLd.getNode(), 1)); - DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain); - DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1), - SDValue(NewLd.getNode(), 1)); - } - - return NewLd; + return CreateLoad(VT, LDBase); } int LoadSize = @@ -5667,6 +5669,19 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts, return DAG.getBitcast(VT, ResNode); } + + // VZEXT_MOVL - consecutive 32-bit load/undefs followed by zeros/undefs. + if (IsConsecutiveLoad && FirstLoadedElt == 0 && LoadSize == 32 && + ((VT.is128BitVector() && TLI.isTypeLegal(MVT::v4i32)) || + (VT.is256BitVector() && TLI.isTypeLegal(MVT::v8i32)) || + (VT.is512BitVector() && TLI.isTypeLegal(MVT::v16i32)))) { + MVT VecVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32); + SDValue V = CreateLoad(MVT::i32, LDBase); + V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, V); + V = DAG.getNode(X86ISD::VZEXT_MOVL, DL, VecVT, V); + return DAG.getBitcast(VT, V); + } + return SDValue(); } diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 368651935a3..4b61edde262 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -3046,6 +3046,18 @@ let Predicates = [HasAVX512] in { def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; + + // Represent the same patterns above but in the form they appear for + // 512-bit types + def : Pat<(v16i32 (X86vzmovl (insert_subvector undef, + (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), + (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>; + def : Pat<(v16f32 (X86vzmovl (insert_subvector undef, + (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))), + (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; + def : Pat<(v8f64 (X86vzmovl (insert_subvector undef, + (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))), + (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; } def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, (v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))), |