diff options
| author | Ulrich Weigand <ulrich.weigand@de.ibm.com> | 2019-07-12 18:13:16 +0000 |
|---|---|---|
| committer | Ulrich Weigand <ulrich.weigand@de.ibm.com> | 2019-07-12 18:13:16 +0000 |
| commit | 0f0a8b77843e73212ab20cc9657b4db7c928abc0 (patch) | |
| tree | 7beb218c60e6587e1968245a19cb48ee8fdcf246 /llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | |
| parent | 223573c8ba446f8c8efe27187fdcaee0ffdbc747 (diff) | |
| download | bcm5719-llvm-0f0a8b77843e73212ab20cc9657b4db7c928abc0.tar.gz bcm5719-llvm-0f0a8b77843e73212ab20cc9657b4db7c928abc0.zip | |
[SystemZ] Add support for new cpu architecture - arch13
This patch series adds support for the next-generation arch13
CPU architecture to the SystemZ backend.
This includes:
- Basic support for the new processor and its features.
- Assembler/disassembler support for new instructions.
- CodeGen for new instructions, including new LLVM intrinsics.
- Scheduler description for the new processor.
- Detection of arch13 as host processor.
Note: No currently available Z system supports the arch13
architecture. Once new systems become available, the
official system name will be added as supported -march name.
llvm-svn: 365932
Diffstat (limited to 'llvm/lib/Target/SystemZ/SystemZISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 243 |
1 files changed, 230 insertions, 13 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 56ec3a5b588..f5323a0ee06 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -252,6 +252,12 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote); setOperationAction(ISD::CTLZ, MVT::i64, Legal); + // On arch13 we have native support for a 64-bit CTPOP. + if (Subtarget.hasMiscellaneousExtensions3()) { + setOperationAction(ISD::CTPOP, MVT::i32, Promote); + setOperationAction(ISD::CTPOP, MVT::i64, Legal); + } + // Give LowerOperation the chance to replace 64-bit ORs with subregs. setOperationAction(ISD::OR, MVT::i64, Custom); @@ -377,6 +383,17 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal); } + if (Subtarget.hasVectorEnhancements2()) { + setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v4f32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v4f32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal); + } + // Handle floating-point types. for (unsigned I = MVT::FIRST_FP_VALUETYPE; I <= MVT::LAST_FP_VALUETYPE; @@ -576,6 +593,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::STORE); + setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::FP_ROUND); setTargetDAGCombine(ISD::FP_EXTEND); @@ -1809,6 +1827,20 @@ static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { CCValid = SystemZ::CCMASK_ANY; return true; + case Intrinsic::s390_vstrsb: + case Intrinsic::s390_vstrsh: + case Intrinsic::s390_vstrsf: + Opcode = SystemZISD::VSTRS_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vstrszb: + case Intrinsic::s390_vstrszh: + case Intrinsic::s390_vstrszf: + Opcode = SystemZISD::VSTRSZ_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + case Intrinsic::s390_vfcedbs: case Intrinsic::s390_vfcesbs: Opcode = SystemZISD::VFCMPES; @@ -4506,9 +4538,18 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, return GS.getNode(DAG, SDLoc(BVN)); } +bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const { + if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed()) + return true; + if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV) + return true; + return false; +} + // Combine GPR scalar values Elems into a vector of type VT. -static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, - SmallVectorImpl<SDValue> &Elems) { +SDValue +SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, + SmallVectorImpl<SDValue> &Elems) const { // See whether there is a single replicated value. SDValue Single; unsigned int NumElements = Elems.size(); @@ -4537,13 +4578,13 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, // we would need 2 instructions to replicate it: VLVGP followed by VREPx. // This is only a win if the single defined element is used more than once. // In other cases we're better off using a single VLVGx. - if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD)) + if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single))) return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single); // If all elements are loads, use VLREP/VLEs (below). bool AllLoads = true; for (auto Elem : Elems) - if (Elem.getOpcode() != ISD::LOAD || cast<LoadSDNode>(Elem)->isIndexed()) { + if (!isVectorElementLoad(Elem)) { AllLoads = false; break; } @@ -4615,8 +4656,7 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, std::map<const SDNode*, unsigned> UseCounts; SDNode *LoadMaxUses = nullptr; for (unsigned I = 0; I < NumElements; ++I) - if (Elems[I].getOpcode() == ISD::LOAD && - cast<LoadSDNode>(Elems[I])->isUnindexed()) { + if (isVectorElementLoad(Elems[I])) { SDNode *Ld = Elems[I].getNode(); UseCounts[Ld]++; if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld]) @@ -5152,6 +5192,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(VISTR_CC); OPCODE(VSTRC_CC); OPCODE(VSTRCZ_CC); + OPCODE(VSTRS_CC); + OPCODE(VSTRSZ_CC); OPCODE(TDC); OPCODE(ATOMIC_SWAPW); OPCODE(ATOMIC_LOADW_ADD); @@ -5171,6 +5213,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(ATOMIC_CMP_SWAP_128); OPCODE(LRV); OPCODE(STRV); + OPCODE(VLER); + OPCODE(VSTER); OPCODE(PREFETCH); } return nullptr; @@ -5484,6 +5528,31 @@ SDValue SystemZTargetLowering::combineLOAD( return SDValue(N, 0); } +bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const { + if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) + return true; + if (Subtarget.hasVectorEnhancements2()) + if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64) + return true; + return false; +} + +static bool isVectorElementSwap(ArrayRef<int> M, EVT VT) { + if (!VT.isVector() || !VT.isSimple() || + VT.getSizeInBits() != 128 || + VT.getScalarSizeInBits() % 8 != 0) + return false; + + unsigned NumElts = VT.getVectorNumElements(); + for (unsigned i = 0; i < NumElts; ++i) { + if (M[i] < 0) continue; // ignore UNDEF indices + if ((unsigned) M[i] != NumElts - 1 - i) + return false; + } + + return true; +} + SDValue SystemZTargetLowering::combineSTORE( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -5505,13 +5574,11 @@ SDValue SystemZTargetLowering::combineSTORE( SN->getMemOperand()); } } - // Combine STORE (BSWAP) into STRVH/STRV/STRVG + // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR if (!SN->isTruncatingStore() && Op1.getOpcode() == ISD::BSWAP && Op1.getNode()->hasOneUse() && - (Op1.getValueType() == MVT::i16 || - Op1.getValueType() == MVT::i32 || - Op1.getValueType() == MVT::i64)) { + canLoadStoreByteSwapped(Op1.getValueType())) { SDValue BSwapOp = Op1.getOperand(0); @@ -5526,15 +5593,97 @@ SDValue SystemZTargetLowering::combineSTORE( DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other), Ops, MemVT, SN->getMemOperand()); } + // Combine STORE (element-swap) into VSTER + if (!SN->isTruncatingStore() && + Op1.getOpcode() == ISD::VECTOR_SHUFFLE && + Op1.getNode()->hasOneUse() && + Subtarget.hasVectorEnhancements2()) { + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode()); + ArrayRef<int> ShuffleMask = SVN->getMask(); + if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) { + SDValue Ops[] = { + N->getOperand(0), Op1.getOperand(0), N->getOperand(2) + }; + + return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N), + DAG.getVTList(MVT::Other), + Ops, MemVT, SN->getMemOperand()); + } + } + + return SDValue(); +} + +SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + // Combine element-swap (LOAD) into VLER + if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && + N->getOperand(0).hasOneUse() && + Subtarget.hasVectorEnhancements2()) { + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); + ArrayRef<int> ShuffleMask = SVN->getMask(); + if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) { + SDValue Load = N->getOperand(0); + LoadSDNode *LD = cast<LoadSDNode>(Load); + + // Create the element-swapping load. + SDValue Ops[] = { + LD->getChain(), // Chain + LD->getBasePtr() // Ptr + }; + SDValue ESLoad = + DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N), + DAG.getVTList(LD->getValueType(0), MVT::Other), + Ops, LD->getMemoryVT(), LD->getMemOperand()); + + // First, combine the VECTOR_SHUFFLE away. This makes the value produced + // by the load dead. + DCI.CombineTo(N, ESLoad); + + // Next, combine the load away, we give it a bogus result value but a real + // chain result. The result value is dead because the shuffle is dead. + DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1)); + + // Return N so it doesn't get rechecked! + return SDValue(N, 0); + } + } + return SDValue(); } SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT( SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; if (!Subtarget.hasVector()) return SDValue(); + // Look through bitcasts that retain the number of vector elements. + SDValue Op = N->getOperand(0); + if (Op.getOpcode() == ISD::BITCAST && + Op.getValueType().isVector() && + Op.getOperand(0).getValueType().isVector() && + Op.getValueType().getVectorNumElements() == + Op.getOperand(0).getValueType().getVectorNumElements()) + Op = Op.getOperand(0); + + // Pull BSWAP out of a vector extraction. + if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) { + EVT VecVT = Op.getValueType(); + EVT EltVT = VecVT.getVectorElementType(); + Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT, + Op.getOperand(0), N->getOperand(1)); + DCI.AddToWorklist(Op.getNode()); + Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op); + if (EltVT != N->getValueType(0)) { + DCI.AddToWorklist(Op.getNode()); + Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op); + } + return Op; + } + // Try to simplify a vector extraction. if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) { SDValue Op0 = N->getOperand(0); @@ -5660,11 +5809,10 @@ SDValue SystemZTargetLowering::combineFP_EXTEND( SDValue SystemZTargetLowering::combineBSWAP( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; - // Combine BSWAP (LOAD) into LRVH/LRV/LRVG + // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && N->getOperand(0).hasOneUse() && - (N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i32 || - N->getValueType(0) == MVT::i64)) { + canLoadStoreByteSwapped(N->getValueType(0))) { SDValue Load = N->getOperand(0); LoadSDNode *LD = cast<LoadSDNode>(Load); @@ -5697,6 +5845,74 @@ SDValue SystemZTargetLowering::combineBSWAP( // Return N so it doesn't get rechecked! return SDValue(N, 0); } + + // Look through bitcasts that retain the number of vector elements. + SDValue Op = N->getOperand(0); + if (Op.getOpcode() == ISD::BITCAST && + Op.getValueType().isVector() && + Op.getOperand(0).getValueType().isVector() && + Op.getValueType().getVectorNumElements() == + Op.getOperand(0).getValueType().getVectorNumElements()) + Op = Op.getOperand(0); + + // Push BSWAP into a vector insertion if at least one side then simplifies. + if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) { + SDValue Vec = Op.getOperand(0); + SDValue Elt = Op.getOperand(1); + SDValue Idx = Op.getOperand(2); + + if (DAG.isConstantIntBuildVectorOrConstantInt(Vec) || + Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() || + DAG.isConstantIntBuildVectorOrConstantInt(Elt) || + Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() || + (canLoadStoreByteSwapped(N->getValueType(0)) && + ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) { + EVT VecVT = N->getValueType(0); + EVT EltVT = N->getValueType(0).getVectorElementType(); + if (VecVT != Vec.getValueType()) { + Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec); + DCI.AddToWorklist(Vec.getNode()); + } + if (EltVT != Elt.getValueType()) { + Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt); + DCI.AddToWorklist(Elt.getNode()); + } + Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec); + DCI.AddToWorklist(Vec.getNode()); + Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt); + DCI.AddToWorklist(Elt.getNode()); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT, + Vec, Elt, Idx); + } + } + + // Push BSWAP into a vector shuffle if at least one side then simplifies. + ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op); + if (SV && Op.hasOneUse()) { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + + if (DAG.isConstantIntBuildVectorOrConstantInt(Op0) || + Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() || + DAG.isConstantIntBuildVectorOrConstantInt(Op1) || + Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) { + EVT VecVT = N->getValueType(0); + if (VecVT != Op0.getValueType()) { + Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0); + DCI.AddToWorklist(Op0.getNode()); + } + if (VecVT != Op1.getValueType()) { + Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1); + DCI.AddToWorklist(Op1.getNode()); + } + Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0); + DCI.AddToWorklist(Op0.getNode()); + Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1); + DCI.AddToWorklist(Op1.getNode()); + return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask()); + } + } + return SDValue(); } @@ -5919,6 +6135,7 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI); case ISD::LOAD: return combineLOAD(N, DCI); case ISD::STORE: return combineSTORE(N, DCI); + case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI); case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI); case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI); case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); |

