diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 177 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 16 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 36 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 63 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 13 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/CodeGen/TargetLoweringBase.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.td | 27 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 118 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 74 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrMVE.td | 118 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 44 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 35 |
13 files changed, 518 insertions, 213 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 793352c16d3..e6844e556b1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -8724,6 +8724,10 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return Chain; + // Try transforming N to an indexed store. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + return SDValue(); } @@ -8748,6 +8752,10 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return CombineTo(N, MLD->getPassThru(), MLD->getChain()); + // Try transforming N to an indexed load. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + return SDValue(); } @@ -9506,11 +9514,10 @@ static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, SDLoc dl(Ld); SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru()); - SDValue NewLoad = DAG.getMaskedLoad(VT, dl, Ld->getChain(), - Ld->getBasePtr(), Ld->getMask(), - PassThru, Ld->getMemoryVT(), - Ld->getMemOperand(), ExtLoadType, - Ld->isExpandingLoad()); + SDValue NewLoad = DAG.getMaskedLoad( + VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(), + PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(), + ExtLoadType, Ld->isExpandingLoad()); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1)); return NewLoad; } @@ -13612,12 +13619,22 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, EVT VT; unsigned AS; - if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { if (LD->isIndexed() || LD->getBasePtr().getNode() != N) return false; VT = LD->getMemoryVT(); AS = LD->getAddressSpace(); - } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { + if (ST->isIndexed() || ST->getBasePtr().getNode() != N) + return false; + VT = ST->getMemoryVT(); + AS = ST->getAddressSpace(); + } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) { + if (LD->isIndexed() || LD->getBasePtr().getNode() != N) + return false; + VT = LD->getMemoryVT(); + AS = LD->getAddressSpace(); + } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) { if (ST->isIndexed() || ST->getBasePtr().getNode() != N) return false; VT = ST->getMemoryVT(); @@ -13651,38 +13668,64 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, VT.getTypeForEVT(*DAG.getContext()), AS); } -/// Try turning a load/store into a pre-indexed load/store when the base -/// pointer is an add or subtract and it has other uses besides the load/store. -/// After the transformation, the new indexed load/store has effectively folded -/// the add/subtract in and all of its other uses are redirected to the -/// new load/store. -bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { - if (Level < AfterLegalizeDAG) - return false; - - bool isLoad = true; - SDValue Ptr; - EVT VT; - if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { +static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, + bool &IsLoad, bool &IsMasked, SDValue &Ptr, + const TargetLowering &TLI) { + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { if (LD->isIndexed()) return false; - VT = LD->getMemoryVT(); - if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && - !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) + EVT VT = LD->getMemoryVT(); + if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT)) return false; Ptr = LD->getBasePtr(); - } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { if (ST->isIndexed()) return false; - VT = ST->getMemoryVT(); - if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && - !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) + EVT VT = ST->getMemoryVT(); + if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT)) return false; Ptr = ST->getBasePtr(); - isLoad = false; + IsLoad = false; + } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) { + if (LD->isIndexed()) + return false; + EVT VT = LD->getMemoryVT(); + if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) && + !TLI.isIndexedMaskedLoadLegal(Dec, VT)) + return false; + Ptr = LD->getBasePtr(); + IsMasked = true; + } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) { + if (ST->isIndexed()) + return false; + EVT VT = ST->getMemoryVT(); + if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) && + !TLI.isIndexedMaskedStoreLegal(Dec, VT)) + return false; + Ptr = ST->getBasePtr(); + IsLoad = false; + IsMasked = true; } else { return false; } + return true; +} + +/// Try turning a load/store into a pre-indexed load/store when the base +/// pointer is an add or subtract and it has other uses besides the load/store. +/// After the transformation, the new indexed load/store has effectively folded +/// the add/subtract in and all of its other uses are redirected to the +/// new load/store. +bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { + if (Level < AfterLegalizeDAG) + return false; + + bool IsLoad = true; + bool IsMasked = false; + SDValue Ptr; + if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked, + Ptr, TLI)) + return false; // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail // out. There is no reason to make this a preinc/predec. @@ -13724,8 +13767,9 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { return false; // Check #2. - if (!isLoad) { - SDValue Val = cast<StoreSDNode>(N)->getValue(); + if (!IsLoad) { + SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue() + : cast<StoreSDNode>(N)->getValue(); // Would require a copy. if (Val == BasePtr) @@ -13801,18 +13845,26 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { return false; SDValue Result; - if (isLoad) - Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), - BasePtr, Offset, AM); - else - Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N), - BasePtr, Offset, AM); + if (!IsMasked) { + if (IsLoad) + Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM); + else + Result = + DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM); + } else { + if (IsLoad) + Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr, + Offset, AM); + else + Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr, + Offset, AM); + } ++PreIndexedNodes; ++NodesCombined; LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); - if (isLoad) { + if (IsLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); } else { @@ -13866,7 +13918,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // We can now generate the new expression. SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0)); - SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); + SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0); SDValue NewUse = DAG.getNode(Opcode, DL, @@ -13876,7 +13928,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { } // Replace the uses of Ptr with uses of the updated base value. - DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); + DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0)); deleteAndRecombine(Ptr.getNode()); AddToWorklist(Result.getNode()); @@ -13891,29 +13943,12 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { if (Level < AfterLegalizeDAG) return false; - bool isLoad = true; + bool IsLoad = true; + bool IsMasked = false; SDValue Ptr; - EVT VT; - if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { - if (LD->isIndexed()) - return false; - VT = LD->getMemoryVT(); - if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && - !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) - return false; - Ptr = LD->getBasePtr(); - } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { - if (ST->isIndexed()) - return false; - VT = ST->getMemoryVT(); - if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && - !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) - return false; - Ptr = ST->getBasePtr(); - isLoad = false; - } else { + if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad, IsMasked, + Ptr, TLI)) return false; - } if (Ptr.getNode()->hasOneUse()) return false; @@ -13949,7 +13984,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { // If all the uses are load / store addresses, then don't do the // transformation. - if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ + if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) { bool RealUse = false; for (SDNode *UseUse : Use->uses()) { if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) @@ -13975,18 +14010,24 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { Worklist.push_back(Op); if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) && !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) { - SDValue Result = isLoad - ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), - BasePtr, Offset, AM) - : DAG.getIndexedStore(SDValue(N,0), SDLoc(N), - BasePtr, Offset, AM); + SDValue Result; + if (!IsMasked) + Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, + Offset, AM) + : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), + BasePtr, Offset, AM); + else + Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), + BasePtr, Offset, AM) + : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), + BasePtr, Offset, AM); ++PostIndexedNodes; ++NodesCombined; LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); - if (isLoad) { + if (IsLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); } else { @@ -13998,7 +14039,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { // Replace the uses of Use with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), - Result.getValue(isLoad ? 1 : 0)); + Result.getValue(IsLoad ? 1 : 0)); deleteAndRecombine(Op); return true; } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 56c13bb0753..9f8da60eb9a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -592,8 +592,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) { SDLoc dl(N); SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(), - N->getMask(), ExtPassThru, N->getMemoryVT(), - N->getMemOperand(), ISD::EXTLOAD); + N->getOffset(), N->getMask(), ExtPassThru, + N->getMemoryVT(), N->getMemOperand(), + N->getAddressingMode(), ISD::EXTLOAD); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -1485,11 +1486,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, SDLoc dl(N); bool TruncateStore = false; - if (OpNo == 3) { + if (OpNo == 4) { Mask = PromoteTargetBoolean(Mask, DataVT); // Update in place. SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); - NewOps[3] = Mask; + NewOps[4] = Mask; return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } else { // Data operand assert(OpNo == 1 && "Unexpected operand for promotion"); @@ -1497,14 +1498,15 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, TruncateStore = true; } - return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask, - N->getMemoryVT(), N->getMemOperand(), + return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), + N->getOffset(), Mask, N->getMemoryVT(), + N->getMemOperand(), N->getAddressingMode(), TruncateStore, N->isCompressingStore()); } SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo) { - assert(OpNo == 2 && "Only know how to promote the mask!"); + assert(OpNo == 3 && "Only know how to promote the mask!"); EVT DataVT = N->getValueType(0); SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 7bca3ea888e..9403b344ea7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1541,12 +1541,15 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi) { + assert(MLD->isUnindexed() && "Indexed masked load during type legalization!"); EVT LoVT, HiVT; SDLoc dl(MLD); std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); SDValue Ch = MLD->getChain(); SDValue Ptr = MLD->getBasePtr(); + SDValue Offset = MLD->getOffset(); + assert(Offset.isUndef() && "Unexpected indexed masked load offset"); SDValue Mask = MLD->getMask(); SDValue PassThru = MLD->getPassThru(); unsigned Alignment = MLD->getOriginalAlignment(); @@ -1578,8 +1581,9 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), MLD->getRanges()); - Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, PassThruLo, LoMemVT, MMO, - ExtType, MLD->isExpandingLoad()); + Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT, + MMO, MLD->getAddressingMode(), ExtType, + MLD->isExpandingLoad()); Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG, MLD->isExpandingLoad()); @@ -1590,8 +1594,9 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, HiMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), MLD->getRanges()); - Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, PassThruHi, HiMemVT, MMO, - ExtType, MLD->isExpandingLoad()); + Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi, HiMemVT, + MMO, MLD->getAddressingMode(), ExtType, + MLD->isExpandingLoad()); // Build a factor node to remember that this load is independent of the // other one. @@ -2326,8 +2331,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo) { + assert(N->isUnindexed() && "Indexed masked store of vector?"); SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); + SDValue Offset = N->getOffset(); + assert(Offset.isUndef() && "Unexpected indexed masked store offset"); SDValue Mask = N->getMask(); SDValue Data = N->getValue(); EVT MemoryVT = N->getMemoryVT(); @@ -2361,8 +2369,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); - Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, - N->isTruncatingStore(), + Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO, + N->getAddressingMode(), N->isTruncatingStore(), N->isCompressingStore()); Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, @@ -2374,8 +2382,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, HiMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); - Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, - N->isTruncatingStore(), N->isCompressingStore()); + Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO, + N->getAddressingMode(), N->isTruncatingStore(), + N->isCompressingStore()); // Build a factor node to remember that this store is independent of the // other one. @@ -3699,10 +3708,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { WidenVT.getVectorNumElements()); Mask = ModifyToType(Mask, WideMaskVT, true); - SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(), - Mask, PassThru, N->getMemoryVT(), - N->getMemOperand(), ExtType, - N->isExpandingLoad()); + SDValue Res = DAG.getMaskedLoad( + WidenVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, + PassThru, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(), + ExtType, N->isExpandingLoad()); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -4447,7 +4456,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { StVal.getValueType().getVectorNumElements() && "Mask and data vectors should have the same number of elements"); return DAG.getMaskedStore(MST->getChain(), dl, StVal, MST->getBasePtr(), - Mask, MST->getMemoryVT(), MST->getMemOperand(), + MST->getOffset(), Mask, MST->getMemoryVT(), + MST->getMemOperand(), MST->getAddressingMode(), false, MST->isCompressingStore()); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index f1b88d80f43..a20e43462f7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6975,16 +6975,22 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl, } SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, - SDValue Ptr, SDValue Mask, SDValue PassThru, - EVT MemVT, MachineMemOperand *MMO, + SDValue Base, SDValue Offset, SDValue Mask, + SDValue PassThru, EVT MemVT, + MachineMemOperand *MMO, + ISD::MemIndexedMode AM, ISD::LoadExtType ExtTy, bool isExpanding) { - SDVTList VTs = getVTList(VT, MVT::Other); - SDValue Ops[] = { Chain, Ptr, Mask, PassThru }; + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.isUndef()) && + "Unindexed masked load with an offset!"); + SDVTList VTs = Indexed ? getVTList(VT, Base.getValueType(), MVT::Other) + : getVTList(VT, MVT::Other); + SDValue Ops[] = {Chain, Base, Offset, Mask, PassThru}; FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops); ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>( - dl.getIROrder(), VTs, ExtTy, isExpanding, MemVT, MMO)); + dl.getIROrder(), VTs, AM, ExtTy, isExpanding, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -6992,7 +6998,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, return SDValue(E, 0); } auto *N = newSDNode<MaskedLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, - ExtTy, isExpanding, MemVT, MMO); + AM, ExtTy, isExpanding, MemVT, MMO); createOperands(N, Ops); CSEMap.InsertNode(N, IP); @@ -7002,27 +7008,45 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, return V; } +SDValue SelectionDAG::getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM) { + MaskedLoadSDNode *LD = cast<MaskedLoadSDNode>(OrigLoad); + assert(LD->getOffset().isUndef() && "Masked load is already a indexed load!"); + return getMaskedLoad(OrigLoad.getValueType(), dl, LD->getChain(), Base, + Offset, LD->getMask(), LD->getPassThru(), + LD->getMemoryVT(), LD->getMemOperand(), AM, + LD->getExtensionType(), LD->isExpandingLoad()); +} + SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, - SDValue Val, SDValue Ptr, SDValue Mask, - EVT MemVT, MachineMemOperand *MMO, - bool IsTruncating, bool IsCompressing) { + SDValue Val, SDValue Base, SDValue Offset, + SDValue Mask, EVT MemVT, + MachineMemOperand *MMO, + ISD::MemIndexedMode AM, bool IsTruncating, + bool IsCompressing) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); - SDVTList VTs = getVTList(MVT::Other); - SDValue Ops[] = { Chain, Val, Ptr, Mask }; + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.isUndef()) && + "Unindexed masked store with an offset!"); + SDVTList VTs = Indexed ? getVTList(Base.getValueType(), MVT::Other) + : getVTList(MVT::Other); + SDValue Ops[] = {Chain, Val, Base, Offset, Mask}; FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops); ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>( - dl.getIROrder(), VTs, IsTruncating, IsCompressing, MemVT, MMO)); + dl.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<MaskedStoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - auto *N = newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, - IsTruncating, IsCompressing, MemVT, MMO); + auto *N = + newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, + IsTruncating, IsCompressing, MemVT, MMO); createOperands(N, Ops); CSEMap.InsertNode(N, IP); @@ -7032,6 +7056,17 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, return V; } +SDValue SelectionDAG::getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM) { + MaskedStoreSDNode *ST = cast<MaskedStoreSDNode>(OrigStore); + assert(ST->getOffset().isUndef() && + "Masked store is already a indexed store!"); + return getMaskedStore(ST->getChain(), dl, ST->getValue(), Base, Offset, + ST->getMask(), ST->getMemoryVT(), ST->getMemOperand(), + AM, ST->isTruncatingStore(), ST->isCompressingStore()); +} + SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef<SDValue> Ops, MachineMemOperand *MMO, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 1ed0dc2c979..0aeb3c14aa3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4295,6 +4295,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, SDValue Ptr = getValue(PtrOperand); SDValue Src0 = getValue(Src0Operand); SDValue Mask = getValue(MaskOperand); + SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); EVT VT = Src0.getValueType(); if (!Alignment) @@ -4311,9 +4312,9 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, // vectors. VT.getStoreSize().getKnownMinSize(), Alignment, AAInfo); - SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT, - MMO, false /* Truncating */, - IsCompressing); + SDValue StoreNode = + DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO, + ISD::UNINDEXED, false /* Truncating */, IsCompressing); DAG.setRoot(StoreNode); setValue(&I, StoreNode); } @@ -4461,6 +4462,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { SDValue Ptr = getValue(PtrOperand); SDValue Src0 = getValue(Src0Operand); SDValue Mask = getValue(MaskOperand); + SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); EVT VT = Src0.getValueType(); if (!Alignment) @@ -4491,8 +4493,9 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { VT.getStoreSize().getKnownMinSize(), Alignment, AAInfo, Ranges); - SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO, - ISD::NON_EXTLOAD, IsExpanding); + SDValue Load = + DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO, + ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding); if (AddToChain) PendingLoads.push_back(Load.getValue(1)); setValue(&I, Load); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index bc10f762123..f863d987648 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -685,6 +685,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (doExt) OS << " from " << MLd->getMemoryVT().getEVTString(); + const char *AM = getIndexedModeName(MLd->getAddressingMode()); + if (*AM) + OS << ", " << AM; + if (MLd->isExpandingLoad()) OS << ", expanding"; @@ -696,6 +700,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (MSt->isTruncatingStore()) OS << ", trunc to " << MSt->getMemoryVT().getEVTString(); + const char *AM = getIndexedModeName(MSt->getAddressingMode()); + if (*AM) + OS << ", " << AM; + if (MSt->isCompressingStore()) OS << ", compressing"; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index af7dc432eae..cc436fcc4f6 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -633,6 +633,8 @@ void TargetLoweringBase::initActions() { IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { setIndexedLoadAction(IM, VT, Expand); setIndexedStoreAction(IM, VT, Expand); + setIndexedMaskedLoadAction(IM, VT, Expand); + setIndexedMaskedStoreAction(IM, VT, Expand); } // Most backends expect to see the node which just returns the value loaded. diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 80cf31ff3d5..ec84c1efbaf 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -262,15 +262,17 @@ def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4, // non-extending masked load fragment. def nonext_masked_load : PatFrag<(ops node:$ptr, node:$pred, node:$def), - (masked_ld node:$ptr, node:$pred, node:$def), [{ - return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD; + (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ + return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD && + cast<MaskedLoadSDNode>(N)->isUnindexed(); }]>; // sign extending masked load fragments. def asext_masked_load : PatFrag<(ops node:$ptr, node:$pred, node:$def), - (masked_ld node:$ptr, node:$pred, node:$def),[{ - return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD || - cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD; + (masked_ld node:$ptr, undef, node:$pred, node:$def),[{ + return (cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD || + cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD) && + cast<MaskedLoadSDNode>(N)->isUnindexed(); }]>; def asext_masked_load_i8 : PatFrag<(ops node:$ptr, node:$pred, node:$def), @@ -290,8 +292,9 @@ def asext_masked_load_i32 : // zero extending masked load fragments. def zext_masked_load : PatFrag<(ops node:$ptr, node:$pred, node:$def), - (masked_ld node:$ptr, node:$pred, node:$def), [{ - return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD; + (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ + return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD && + cast<MaskedLoadSDNode>(N)->isUnindexed(); }]>; def zext_masked_load_i8 : PatFrag<(ops node:$ptr, node:$pred, node:$def), @@ -312,14 +315,16 @@ def zext_masked_load_i32 : // non-truncating masked store fragment. def nontrunc_masked_store : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (masked_st node:$val, node:$ptr, node:$pred), [{ - return !cast<MaskedStoreSDNode>(N)->isTruncatingStore(); + (masked_st node:$val, node:$ptr, undef, node:$pred), [{ + return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() && + cast<MaskedStoreSDNode>(N)->isUnindexed(); }]>; // truncating masked store fragments. def trunc_masked_store : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (masked_st node:$val, node:$ptr, node:$pred), [{ - return cast<MaskedStoreSDNode>(N)->isTruncatingStore(); + (masked_st node:$val, node:$ptr, undef, node:$pred), [{ + return cast<MaskedStoreSDNode>(N)->isTruncatingStore() && + cast<MaskedStoreSDNode>(N)->isUnindexed(); }]>; def trunc_masked_store_i8 : PatFrag<(ops node:$val, node:$ptr, node:$pred), diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 46a2560e167..a6b334938e1 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1351,11 +1351,27 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm, unsigned Shift) { unsigned Opcode = Op->getOpcode(); - ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) - ? cast<LoadSDNode>(Op)->getAddressingMode() - : cast<StoreSDNode>(Op)->getAddressingMode(); + ISD::MemIndexedMode AM; + switch (Opcode) { + case ISD::LOAD: + AM = cast<LoadSDNode>(Op)->getAddressingMode(); + break; + case ISD::STORE: + AM = cast<StoreSDNode>(Op)->getAddressingMode(); + break; + case ISD::MLOAD: + AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode(); + break; + case ISD::MSTORE: + AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode(); + break; + default: + llvm_unreachable("Unexpected Opcode for Imm7Offset"); + } + int RHSC; - if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { // 7 bits. + // 7 bit constant, shifted by Shift. + if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32) @@ -1625,58 +1641,93 @@ bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { } bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) { - LoadSDNode *LD = cast<LoadSDNode>(N); - ISD::MemIndexedMode AM = LD->getAddressingMode(); - if (AM == ISD::UNINDEXED) - return false; - EVT LoadedVT = LD->getMemoryVT(); - if (!LoadedVT.isVector()) - return false; - bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; - SDValue Offset; - bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); + EVT LoadedVT; unsigned Opcode = 0; - unsigned Align = LD->getAlignment(); - bool IsLE = Subtarget->isLittle(); + bool isSExtLd, isPre; + unsigned Align; + ARMVCC::VPTCodes Pred; + SDValue PredReg; + SDValue Chain, Base, Offset; + + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + ISD::MemIndexedMode AM = LD->getAddressingMode(); + if (AM == ISD::UNINDEXED) + return false; + LoadedVT = LD->getMemoryVT(); + if (!LoadedVT.isVector()) + return false; + + Chain = LD->getChain(); + Base = LD->getBasePtr(); + Offset = LD->getOffset(); + Align = LD->getAlignment(); + isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; + isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); + Pred = ARMVCC::None; + PredReg = CurDAG->getRegister(0, MVT::i32); + } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) { + ISD::MemIndexedMode AM = LD->getAddressingMode(); + if (AM == ISD::UNINDEXED) + return false; + LoadedVT = LD->getMemoryVT(); + if (!LoadedVT.isVector()) + return false; + Chain = LD->getChain(); + Base = LD->getBasePtr(); + Offset = LD->getOffset(); + Align = LD->getAlignment(); + isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; + isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); + Pred = ARMVCC::Then; + PredReg = LD->getMask(); + } else + llvm_unreachable("Expected a Load or a Masked Load!"); + + // We allow LE non-masked loads to change the type (for example use a vldrb.8 + // as opposed to a vldrw.32). This can allow extra addressing modes or + // alignments for what is otherwise an equivalent instruction. + bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N); + + SDValue NewOffset; if (Align >= 2 && LoadedVT == MVT::v4i16 && - SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) { + SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) { if (isSExtLd) Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post; else Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post; } else if (LoadedVT == MVT::v8i8 && - SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) { + SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { if (isSExtLd) Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post; else Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post; } else if (LoadedVT == MVT::v4i8 && - SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) { + SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { if (isSExtLd) Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post; else Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post; } else if (Align >= 4 && - (IsLE || LoadedVT == MVT::v4i32 || LoadedVT == MVT::v4f32) && - SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 2)) + (CanChangeType || LoadedVT == MVT::v4i32 || + LoadedVT == MVT::v4f32) && + SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2)) Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post; else if (Align >= 2 && - (IsLE || LoadedVT == MVT::v8i16 || LoadedVT == MVT::v8f16) && - SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) + (CanChangeType || LoadedVT == MVT::v8i16 || + LoadedVT == MVT::v8f16) && + SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post; - else if ((IsLE || LoadedVT == MVT::v16i8) && - SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) + else if ((CanChangeType || LoadedVT == MVT::v16i8) && + SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post; else return false; - SDValue Chain = LD->getChain(); - SDValue Base = LD->getBasePtr(); - SDValue Ops[] = {Base, Offset, - CurDAG->getTargetConstant(ARMVCC::None, SDLoc(N), MVT::i32), - CurDAG->getRegister(0, MVT::i32), Chain}; - SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), LD->getValueType(0), + SDValue Ops[] = {Base, NewOffset, + CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), PredReg, + Chain}; + SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), N->getValueType(0), MVT::i32, MVT::Other, Ops); transferMemOperands(N, New); ReplaceUses(SDValue(N, 0), SDValue(New, 1)); @@ -3292,6 +3343,11 @@ void ARMDAGToDAGISel::Select(SDNode *N) { // Other cases are autogenerated. break; } + case ISD::MLOAD: + if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) + return; + // Other cases are autogenerated. + break; case ARMISD::WLS: case ARMISD::LE: { SDValue Ops[] = { N->getOperand(1), diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index e359756b7bf..c153e786e2d 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -296,6 +296,8 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { setIndexedLoadAction(im, VT, Legal); setIndexedStoreAction(im, VT, Legal); + setIndexedMaskedLoadAction(im, VT, Legal); + setIndexedMaskedStoreAction(im, VT, Legal); } } @@ -322,6 +324,8 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { setIndexedLoadAction(im, VT, Legal); setIndexedStoreAction(im, VT, Legal); + setIndexedMaskedLoadAction(im, VT, Legal); + setIndexedMaskedStoreAction(im, VT, Legal); } if (HasMVEFP) { @@ -374,12 +378,12 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { // Pre and Post inc on these are legal, given the correct extends for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { - setIndexedLoadAction(im, MVT::v8i8, Legal); - setIndexedStoreAction(im, MVT::v8i8, Legal); - setIndexedLoadAction(im, MVT::v4i8, Legal); - setIndexedStoreAction(im, MVT::v4i8, Legal); - setIndexedLoadAction(im, MVT::v4i16, Legal); - setIndexedStoreAction(im, MVT::v4i16, Legal); + for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) { + setIndexedLoadAction(im, VT, Legal); + setIndexedStoreAction(im, VT, Legal); + setIndexedMaskedLoadAction(im, VT, Legal); + setIndexedMaskedStoreAction(im, VT, Legal); + } } // Predicate types @@ -9013,8 +9017,9 @@ static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) { SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT, DAG.getTargetConstant(0, dl, MVT::i32)); SDValue NewLoad = DAG.getMaskedLoad( - VT, dl, N->getChain(), N->getBasePtr(), Mask, ZeroVec, N->getMemoryVT(), - N->getMemOperand(), N->getExtensionType(), N->isExpandingLoad()); + VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, ZeroVec, + N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(), + N->getExtensionType(), N->isExpandingLoad()); SDValue Combo = NewLoad; if (!PassThru.isUndef() && (PassThru.getOpcode() != ISD::BITCAST || @@ -15192,14 +15197,19 @@ static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, } static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, unsigned Align, - bool isSEXTLoad, bool isLE, SDValue &Base, - SDValue &Offset, bool &isInc, - SelectionDAG &DAG) { + bool isSEXTLoad, bool IsMasked, bool isLE, + SDValue &Base, SDValue &Offset, + bool &isInc, SelectionDAG &DAG) { if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) return false; if (!isa<ConstantSDNode>(Ptr->getOperand(1))) return false; + // We allow LE non-masked loads to change the type (for example use a vldrb.8 + // as opposed to a vldrw.32). This can allow extra addressing modes or + // alignments for what is otherwise an equivalent instruction. + bool CanChangeType = isLE && !IsMasked; + ConstantSDNode *RHS = cast<ConstantSDNode>(Ptr->getOperand(1)); int RHSC = (int)RHS->getZExtValue(); @@ -15218,7 +15228,7 @@ static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, unsigned Align, }; // Try to find a matching instruction based on s/zext, Alignment, Offset and - // (in BE) type. + // (in BE/masked) type. Base = Ptr->getOperand(0); if (VT == MVT::v4i16) { if (Align >= 2 && IsInRange(RHSC, 0x80, 2)) @@ -15226,13 +15236,15 @@ static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, unsigned Align, } else if (VT == MVT::v4i8 || VT == MVT::v8i8) { if (IsInRange(RHSC, 0x80, 1)) return true; - } else if (Align >= 4 && (isLE || VT == MVT::v4i32 || VT == MVT::v4f32) && + } else if (Align >= 4 && + (CanChangeType || VT == MVT::v4i32 || VT == MVT::v4f32) && IsInRange(RHSC, 0x80, 4)) return true; - else if (Align >= 2 && (isLE || VT == MVT::v8i16 || VT == MVT::v8f16) && + else if (Align >= 2 && + (CanChangeType || VT == MVT::v8i16 || VT == MVT::v8f16) && IsInRange(RHSC, 0x80, 2)) return true; - else if ((isLE || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1)) + else if ((CanChangeType || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1)) return true; return false; } @@ -15252,6 +15264,7 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue Ptr; unsigned Align; bool isSEXTLoad = false; + bool IsMasked = false; if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { Ptr = LD->getBasePtr(); VT = LD->getMemoryVT(); @@ -15261,6 +15274,17 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, Ptr = ST->getBasePtr(); VT = ST->getMemoryVT(); Align = ST->getAlignment(); + } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) { + Ptr = LD->getBasePtr(); + VT = LD->getMemoryVT(); + Align = LD->getAlignment(); + isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; + IsMasked = true; + } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) { + Ptr = ST->getBasePtr(); + VT = ST->getMemoryVT(); + Align = ST->getAlignment(); + IsMasked = true; } else return false; @@ -15269,8 +15293,8 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, if (VT.isVector()) isLegal = Subtarget->hasMVEIntegerOps() && getMVEIndexedAddressParts(Ptr.getNode(), VT, Align, isSEXTLoad, - Subtarget->isLittle(), Base, Offset, - isInc, DAG); + IsMasked, Subtarget->isLittle(), Base, + Offset, isInc, DAG); else { if (Subtarget->isThumb2()) isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, @@ -15298,6 +15322,7 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue Ptr; unsigned Align; bool isSEXTLoad = false, isNonExt; + bool IsMasked = false; if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { VT = LD->getMemoryVT(); Ptr = LD->getBasePtr(); @@ -15309,6 +15334,19 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, Ptr = ST->getBasePtr(); Align = ST->getAlignment(); isNonExt = !ST->isTruncatingStore(); + } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) { + VT = LD->getMemoryVT(); + Ptr = LD->getBasePtr(); + Align = LD->getAlignment(); + isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; + isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD; + IsMasked = true; + } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) { + VT = ST->getMemoryVT(); + Ptr = ST->getBasePtr(); + Align = ST->getAlignment(); + isNonExt = !ST->isTruncatingStore(); + IsMasked = true; } else return false; @@ -15332,7 +15370,7 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, bool isLegal = false; if (VT.isVector()) isLegal = Subtarget->hasMVEIntegerOps() && - getMVEIndexedAddressParts(Op, VT, Align, isSEXTLoad, + getMVEIndexedAddressParts(Op, VT, Align, isSEXTLoad, IsMasked, Subtarget->isLittle(), Base, Offset, isInc, DAG); else { diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 429d0a1cf1b..dd8c032dae4 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -5332,6 +5332,10 @@ class MVE_vector_offset_store_typed<ValueType Ty, Instruction Opcode, PatFrag StoreKind, int shift> : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<shift>:$addr), (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<shift>:$addr)>; +class MVE_vector_offset_maskedstore_typed<ValueType Ty, Instruction Opcode, + PatFrag StoreKind, int shift> + : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<shift>:$addr, VCCR:$pred), + (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<shift>:$addr, (i32 1), VCCR:$pred)>; multiclass MVE_vector_offset_store<Instruction RegImmInst, PatFrag StoreKind, int shift> { @@ -5363,7 +5367,7 @@ def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), def maskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (masked_ld node:$ptr, node:$pred, node:$passthru), [{ + (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{ auto *Ld = cast<MaskedLoadSDNode>(N); return Ld->getMemoryVT().getScalarType() == MVT::i8; }]>; @@ -5382,7 +5386,7 @@ def extmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD; }]>; def alignedmaskedload16: PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (masked_ld node:$ptr, node:$pred, node:$passthru), [{ + (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{ auto *Ld = cast<MaskedLoadSDNode>(N); EVT ScalarVT = Ld->getMemoryVT().getScalarType(); return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && Ld->getAlignment() >= 2; @@ -5402,14 +5406,14 @@ def extmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD; }]>; def alignedmaskedload32: PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (masked_ld node:$ptr, node:$pred, node:$passthru), [{ + (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{ auto *Ld = cast<MaskedLoadSDNode>(N); EVT ScalarVT = Ld->getMemoryVT().getScalarType(); return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && Ld->getAlignment() >= 4; }]>; def maskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (masked_st node:$val, node:$ptr, node:$pred), [{ + (masked_st node:$val, node:$ptr, undef, node:$pred), [{ return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; }]>; def truncatingmaskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred), @@ -5417,7 +5421,7 @@ def truncatingmaskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred), return cast<MaskedStoreSDNode>(N)->isTruncatingStore(); }]>; def maskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (masked_st node:$val, node:$ptr, node:$pred), [{ + (masked_st node:$val, node:$ptr, undef, node:$pred), [{ auto *St = cast<MaskedStoreSDNode>(N); EVT ScalarVT = St->getMemoryVT().getScalarType(); return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; @@ -5428,12 +5432,41 @@ def truncatingmaskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred), return cast<MaskedStoreSDNode>(N)->isTruncatingStore(); }]>; def maskedstore32 : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (masked_st node:$val, node:$ptr, node:$pred), [{ + (masked_st node:$val, node:$ptr, undef, node:$pred), [{ auto *St = cast<MaskedStoreSDNode>(N); EVT ScalarVT = St->getMemoryVT().getScalarType(); return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4; }]>; + +def pre_maskedstore : PatFrag<(ops node:$val, node:$base, node:$offset, node:$mask), + (masked_st node:$val, node:$base, node:$offset, node:$mask), [{ + ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode(); + return AM == ISD::PRE_INC || AM == ISD::PRE_DEC; +}]>; +def post_maskedstore : PatFrag<(ops node:$val, node:$base, node:$offset, node:$mask), + (masked_st node:$val, node:$base, node:$offset, node:$mask), [{ + ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode(); + return AM == ISD::POST_INC || AM == ISD::POST_DEC; +}]>; +def aligned32_pre_maskedstore : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), + (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ + return cast<MaskedStoreSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned32_post_maskedstore : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), + (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ + return cast<MaskedStoreSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned16_pre_maskedstore : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), + (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ + return cast<MaskedStoreSDNode>(N)->getAlignment() >= 2; +}]>; +def aligned16_post_maskedstore : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), + (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ + return cast<MaskedStoreSDNode>(N)->getAlignment() >= 2; +}]>; + + let Predicates = [HasMVEInt, IsLE] in { // Stores defm : MVE_vector_store<MVE_VSTRBU8, byte_alignedstore, 0>; @@ -5515,19 +5548,26 @@ let Predicates = [HasMVEInt] in { def : MVE_vector_maskedstore_typed<v8f16, MVE_VSTRHU16, maskedstore16, 1>; def : MVE_vector_maskedstore_typed<v4i32, MVE_VSTRWU32, maskedstore32, 2>; def : MVE_vector_maskedstore_typed<v4f32, MVE_VSTRWU32, maskedstore32, 2>; - // Truncating stores - def : Pat<(truncatingmaskedstore8 (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), - (MVE_VSTRB16 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; - def : Pat<(truncatingmaskedstore8 (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), - (MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; - def : Pat<(truncatingmaskedstore16 (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr, VCCR:$pred), - (MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred)>; + + // Pre/Post inc masked stores + def : MVE_vector_offset_maskedstore_typed<v16i8, MVE_VSTRBU8_pre, pre_maskedstore, 0>; + def : MVE_vector_offset_maskedstore_typed<v16i8, MVE_VSTRBU8_post, post_maskedstore, 0>; + def : MVE_vector_offset_maskedstore_typed<v8i16, MVE_VSTRHU16_pre, aligned16_pre_maskedstore, 1>; + def : MVE_vector_offset_maskedstore_typed<v8i16, MVE_VSTRHU16_post, aligned16_post_maskedstore, 1>; + def : MVE_vector_offset_maskedstore_typed<v8f16, MVE_VSTRHU16_pre, aligned16_pre_maskedstore, 1>; + def : MVE_vector_offset_maskedstore_typed<v8f16, MVE_VSTRHU16_post, aligned16_post_maskedstore, 1>; + def : MVE_vector_offset_maskedstore_typed<v4i32, MVE_VSTRWU32_pre, aligned32_pre_maskedstore, 2>; + def : MVE_vector_offset_maskedstore_typed<v4i32, MVE_VSTRWU32_post, aligned32_post_maskedstore, 2>; + def : MVE_vector_offset_maskedstore_typed<v4f32, MVE_VSTRWU32_pre, aligned32_pre_maskedstore, 2>; + def : MVE_vector_offset_maskedstore_typed<v4f32, MVE_VSTRWU32_post, aligned32_post_maskedstore, 2>; + // Aligned masked loads def : MVE_vector_maskedload_typed<v16i8, MVE_VLDRBU8, maskedload8, 0>; def : MVE_vector_maskedload_typed<v8i16, MVE_VLDRHU16, alignedmaskedload16, 1>; def : MVE_vector_maskedload_typed<v8f16, MVE_VLDRHU16, alignedmaskedload16, 1>; def : MVE_vector_maskedload_typed<v4i32, MVE_VLDRWU32, alignedmaskedload32, 2>; def : MVE_vector_maskedload_typed<v4f32, MVE_VLDRWU32, alignedmaskedload32, 2>; + // Extending masked loads. def : Pat<(v8i16 (sextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, (v8i16 NEONimmAllZerosV))), @@ -5569,6 +5609,37 @@ let MinAlignment = 2 in { (pre_truncstvi16 node:$val, node:$base, node:$offset)>; } +def pre_truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred), + (masked_st node:$val, node:$base, node:$offset, node:$pred), [{ + ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode(); + return AM == ISD::PRE_INC || AM == ISD::PRE_DEC; +}]>; +def pre_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred), + (pre_truncmaskedst node:$val, node:$base, node:$offset, node:$pred), [{ + return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def pre_truncmaskedstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred), + (pre_truncmaskedst node:$val, node:$base, node:$offset, node:$pred), [{ + auto *St = cast<MaskedStoreSDNode>(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; +}]>; +def post_truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd), + (masked_st node:$val, node:$base, node:$offset, node:$postd), [{ + ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode(); + return AM == ISD::POST_INC || AM == ISD::POST_DEC; +}]>; +def post_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd), + (post_truncmaskedst node:$val, node:$base, node:$offset, node:$postd), [{ + return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def post_truncmaskedstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd), + (post_truncmaskedst node:$val, node:$base, node:$offset, node:$postd), [{ + auto *St = cast<MaskedStoreSDNode>(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; +}]>; + let Predicates = [HasMVEInt] in { def : Pat<(truncstorevi8 (v8i16 MQPR:$val), taddrmode_imm7<0>:$addr), (MVE_VSTRB16 MQPR:$val, taddrmode_imm7<0>:$addr)>; @@ -5590,6 +5661,27 @@ let Predicates = [HasMVEInt] in { (MVE_VSTRB32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; def : Pat<(pre_truncstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr), (MVE_VSTRH32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>; + + def : Pat<(truncatingmaskedstore8 (v8i16 MQPR:$val), taddrmode_imm7<0>:$addr, VCCR:$pred), + (MVE_VSTRB16 MQPR:$val, taddrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(truncatingmaskedstore8 (v4i32 MQPR:$val), taddrmode_imm7<0>:$addr, VCCR:$pred), + (MVE_VSTRB32 MQPR:$val, taddrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(truncatingmaskedstore16 (v4i32 MQPR:$val), taddrmode_imm7<1>:$addr, VCCR:$pred), + (MVE_VSTRH32 MQPR:$val, taddrmode_imm7<1>:$addr, (i32 1), VCCR:$pred)>; + + def : Pat<(post_truncmaskedstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr, VCCR:$pred), + (MVE_VSTRB16_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(post_truncmaskedstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr, VCCR:$pred), + (MVE_VSTRB32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(post_truncmaskedstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr, VCCR:$pred), + (MVE_VSTRH32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr, (i32 1), VCCR:$pred)>; + + def : Pat<(pre_truncmaskedstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr, VCCR:$pred), + (MVE_VSTRB16_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(pre_truncmaskedstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr, VCCR:$pred), + (MVE_VSTRB32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(pre_truncmaskedstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr, VCCR:$pred), + (MVE_VSTRH32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr, (i32 1), VCCR:$pred)>; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c3861adf091..32072df268d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -24280,9 +24280,11 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements()); SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); + SDValue Offset = DAG.getUNDEF(VMask.getValueType()); - return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, VMask, MemVT, - MemIntr->getMemOperand(), true /* truncating */); + return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, Offset, VMask, + MemVT, MemIntr->getMemOperand(), ISD::UNINDEXED, + true /* truncating */); } case X86ISD::VTRUNCUS: case X86ISD::VTRUNCS: { @@ -27593,12 +27595,11 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget, if (PassThru.isUndef() || ISD::isBuildVectorAllZeros(PassThru.getNode())) return Op; - SDValue NewLoad = DAG.getMaskedLoad(VT, dl, N->getChain(), - N->getBasePtr(), Mask, - getZeroVector(VT, Subtarget, DAG, dl), - N->getMemoryVT(), N->getMemOperand(), - N->getExtensionType(), - N->isExpandingLoad()); + SDValue NewLoad = DAG.getMaskedLoad( + VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, + getZeroVector(VT, Subtarget, DAG, dl), N->getMemoryVT(), + N->getMemOperand(), N->getAddressingMode(), N->getExtensionType(), + N->isExpandingLoad()); // Emit a blend. SDValue Select = DAG.getNode(ISD::VSELECT, dl, MaskVT, Mask, NewLoad, PassThru); @@ -27632,11 +27633,10 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget, MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); Mask = ExtendToType(Mask, WideMaskVT, DAG, true); - SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(), - N->getBasePtr(), Mask, PassThru, - N->getMemoryVT(), N->getMemOperand(), - N->getExtensionType(), - N->isExpandingLoad()); + SDValue NewLoad = DAG.getMaskedLoad( + WideDataVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, + PassThru, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(), + N->getExtensionType(), N->isExpandingLoad()); SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, NewLoad.getValue(0), @@ -27682,7 +27682,8 @@ static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget, DataToStore = ExtendToType(DataToStore, WideDataVT, DAG); Mask = ExtendToType(Mask, WideMaskVT, DAG, true); return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(), - Mask, N->getMemoryVT(), N->getMemOperand(), + N->getOffset(), Mask, N->getMemoryVT(), + N->getMemOperand(), N->getAddressingMode(), N->isTruncatingStore(), N->isCompressingStore()); } @@ -40453,6 +40454,7 @@ static bool getParamsForOneTrueMaskedElt(MaskedLoadStoreSDNode *MaskedOp, static SDValue reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { + assert(ML->isUnindexed() && "Unexpected indexed masked load!"); // TODO: This is not x86-specific, so it could be lifted to DAGCombiner. // However, some target hooks may need to be added to know when the transform // is profitable. Endianness would also have to be considered. @@ -40480,6 +40482,7 @@ reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG, static SDValue combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { + assert(ML->isUnindexed() && "Unexpected indexed masked load!"); if (!ISD::isBuildVectorOfConstantSDNodes(ML->getMask().getNode())) return SDValue(); @@ -40515,10 +40518,10 @@ combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG, // The new masked load has an undef pass-through operand. The select uses the // original pass-through operand. - SDValue NewML = DAG.getMaskedLoad(VT, DL, ML->getChain(), ML->getBasePtr(), - ML->getMask(), DAG.getUNDEF(VT), - ML->getMemoryVT(), ML->getMemOperand(), - ML->getExtensionType()); + SDValue NewML = DAG.getMaskedLoad( + VT, DL, ML->getChain(), ML->getBasePtr(), ML->getOffset(), ML->getMask(), + DAG.getUNDEF(VT), ML->getMemoryVT(), ML->getMemOperand(), + ML->getAddressingMode(), ML->getExtensionType()); SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML, ML->getPassThru()); @@ -40604,8 +40607,9 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG, TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), Mst->getMemoryVT())) { return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0), - Mst->getBasePtr(), Mask, - Mst->getMemoryVT(), Mst->getMemOperand(), true); + Mst->getBasePtr(), Mst->getOffset(), Mask, + Mst->getMemoryVT(), Mst->getMemOperand(), + Mst->getAddressingMode(), true); } return SDValue(); diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index de6f8a81dff..1a4f7e1e6bb 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -706,6 +706,10 @@ def X86GF2P8affineinvqb : SDNode<"X86ISD::GF2P8AFFINEINVQB", SDTBlend>; def X86GF2P8affineqb : SDNode<"X86ISD::GF2P8AFFINEQB", SDTBlend>; def X86GF2P8mulb : SDNode<"X86ISD::GF2P8MULB", SDTIntBinOp>; +def SDTX86MaskedStore: SDTypeProfile<0, 3, [ // masked store + SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameNumEltsAs<0, 2> +]>; + //===----------------------------------------------------------------------===// // SSE Complex Patterns //===----------------------------------------------------------------------===// @@ -1040,9 +1044,10 @@ def vinsert256_insert : PatFrag<(ops node:$bigvec, node:$smallvec, INSERT_get_vinsert256_imm>; def masked_load : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_ld node:$src1, node:$src2, node:$src3), [{ + (masked_ld node:$src1, undef, node:$src2, node:$src3), [{ return !cast<MaskedLoadSDNode>(N)->isExpandingLoad() && - cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD; + cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD && + cast<MaskedLoadSDNode>(N)->isUnindexed(); }]>; def masked_load_aligned : PatFrag<(ops node:$src1, node:$src2, node:$src3), @@ -1055,17 +1060,19 @@ def masked_load_aligned : PatFrag<(ops node:$src1, node:$src2, node:$src3), }]>; def X86mExpandingLoad : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_ld node:$src1, node:$src2, node:$src3), [{ - return cast<MaskedLoadSDNode>(N)->isExpandingLoad(); + (masked_ld node:$src1, undef, node:$src2, node:$src3), [{ + return cast<MaskedLoadSDNode>(N)->isExpandingLoad() && + cast<MaskedLoadSDNode>(N)->isUnindexed(); }]>; // Masked store fragments. // X86mstore can't be implemented in core DAG files because some targets // do not support vector types (llvm-tblgen will fail). def masked_store : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_st node:$src1, node:$src2, node:$src3), [{ - return (!cast<MaskedStoreSDNode>(N)->isTruncatingStore()) && - (!cast<MaskedStoreSDNode>(N)->isCompressingStore()); + (masked_st node:$src1, node:$src2, undef, node:$src3), [{ + return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() && + !cast<MaskedStoreSDNode>(N)->isCompressingStore() && + cast<MaskedStoreSDNode>(N)->isUnindexed(); }]>; def masked_store_aligned : PatFrag<(ops node:$src1, node:$src2, node:$src3), @@ -1078,16 +1085,18 @@ def masked_store_aligned : PatFrag<(ops node:$src1, node:$src2, node:$src3), }]>; def X86mCompressingStore : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_st node:$src1, node:$src2, node:$src3), [{ - return cast<MaskedStoreSDNode>(N)->isCompressingStore(); + (masked_st node:$src1, node:$src2, undef, node:$src3), [{ + return cast<MaskedStoreSDNode>(N)->isCompressingStore() && + cast<MaskedStoreSDNode>(N)->isUnindexed(); }]>; // masked truncstore fragments // X86mtruncstore can't be implemented in core DAG files because some targets // doesn't support vector type ( llvm-tblgen will fail) def X86mtruncstore : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_st node:$src1, node:$src2, node:$src3), [{ - return cast<MaskedStoreSDNode>(N)->isTruncatingStore(); + (masked_st node:$src1, node:$src2, undef, node:$src3), [{ + return cast<MaskedStoreSDNode>(N)->isTruncatingStore() && + cast<MaskedStoreSDNode>(N)->isUnindexed(); }]>; def masked_truncstorevi8 : PatFrag<(ops node:$src1, node:$src2, node:$src3), @@ -1111,10 +1120,10 @@ def X86TruncSStore : SDNode<"X86ISD::VTRUNCSTORES", SDTStore, def X86TruncUSStore : SDNode<"X86ISD::VTRUNCSTOREUS", SDTStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; -def X86MTruncSStore : SDNode<"X86ISD::VMTRUNCSTORES", SDTMaskedStore, +def X86MTruncSStore : SDNode<"X86ISD::VMTRUNCSTORES", SDTX86MaskedStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; -def X86MTruncUSStore : SDNode<"X86ISD::VMTRUNCSTOREUS", SDTMaskedStore, +def X86MTruncUSStore : SDNode<"X86ISD::VMTRUNCSTOREUS", SDTX86MaskedStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def truncstore_s_vi8 : PatFrag<(ops node:$val, node:$ptr), |