diff options
47 files changed, 9009 insertions, 73 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsSystemZ.td b/llvm/include/llvm/IR/IntrinsicsSystemZ.td index 68ac285b728..40d6ba17eaf 100644 --- a/llvm/include/llvm/IR/IntrinsicsSystemZ.td +++ b/llvm/include/llvm/IR/IntrinsicsSystemZ.td @@ -48,6 +48,9 @@ class SystemZTernaryConv<string name, LLVMType result, LLVMType arg> : GCCBuiltin<"__builtin_s390_" ## name>, Intrinsic<[result], [arg, arg, result], [IntrNoMem]>; +class SystemZTernaryConvCC<LLVMType result, LLVMType arg> + : Intrinsic<[result, llvm_i32_ty], [arg, arg, result], [IntrNoMem]>; + class SystemZTernary<string name, LLVMType type> : SystemZTernaryConv<name, type, type>; @@ -415,6 +418,24 @@ let TargetPrefix = "s390" in { def int_s390_vstrl : GCCBuiltin<"__builtin_s390_vstrl">, Intrinsic<[], [llvm_v16i8_ty, llvm_i32_ty, llvm_ptr_ty], [IntrArgMemOnly, IntrWriteMem]>; + + // Instructions from the Vector Enhancements Facility 2 + def int_s390_vsld : GCCBuiltin<"__builtin_s390_vsld">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<2>]>; + + def int_s390_vsrd : GCCBuiltin<"__builtin_s390_vsrd">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<2>]>; + + def int_s390_vstrsb : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v16i8_ty>; + def int_s390_vstrsh : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v8i16_ty>; + def int_s390_vstrsf : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v4i32_ty>; + def int_s390_vstrszb : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v16i8_ty>; + def int_s390_vstrszh : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v8i16_ty>; + def int_s390_vstrszf : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v4i32_ty>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index 947e9468c26..d491912bdc0 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -315,6 +315,8 @@ StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) { Pos += sizeof("machine = ") - 1; unsigned int Id; if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) { + if (Id >= 8561 && HaveVectorSupport) + return "arch13"; if (Id >= 3906 && HaveVectorSupport) return "z14"; if (Id >= 2964 && HaveVectorSupport) diff --git a/llvm/lib/Target/SystemZ/SystemZFeatures.td b/llvm/lib/Target/SystemZ/SystemZFeatures.td index 2dc44edcc05..dae795e845b 100644 --- a/llvm/lib/Target/SystemZ/SystemZFeatures.td +++ b/llvm/lib/Target/SystemZ/SystemZFeatures.td @@ -241,6 +241,51 @@ def Arch12NewFeatures : SystemZFeatureList<[ //===----------------------------------------------------------------------===// // +// New features added in the Thirteenth Edition of the z/Architecture +// +//===----------------------------------------------------------------------===// + +def FeatureMiscellaneousExtensions3 : SystemZFeature< + "miscellaneous-extensions-3", "MiscellaneousExtensions3", + "Assume that the miscellaneous-extensions facility 3 is installed" +>; + +def FeatureMessageSecurityAssist9 : SystemZFeature< + "message-security-assist-extension9", "MessageSecurityAssist9", + "Assume that the message-security-assist extension facility 9 is installed" +>; + +def FeatureVectorEnhancements2 : SystemZFeature< + "vector-enhancements-2", "VectorEnhancements2", + "Assume that the vector enhancements facility 2 is installed" +>; + +def FeatureVectorPackedDecimalEnhancement : SystemZFeature< + "vector-packed-decimal-enhancement", "VectorPackedDecimalEnhancement", + "Assume that the vector packed decimal enhancement facility is installed" +>; + +def FeatureEnhancedSort : SystemZFeature< + "enhanced-sort", "EnhancedSort", + "Assume that the enhanced-sort facility is installed" +>; + +def FeatureDeflateConversion : SystemZFeature< + "deflate-conversion", "DeflateConversion", + "Assume that the deflate-conversion facility is installed" +>; + +def Arch13NewFeatures : SystemZFeatureList<[ + FeatureMiscellaneousExtensions3, + FeatureMessageSecurityAssist9, + FeatureVectorEnhancements2, + FeatureVectorPackedDecimalEnhancement, + FeatureEnhancedSort, + FeatureDeflateConversion +]>; + +//===----------------------------------------------------------------------===// +// // Cumulative supported and unsupported feature sets // //===----------------------------------------------------------------------===// @@ -255,9 +300,13 @@ def Arch11SupportedFeatures : SystemZFeatureAdd<Arch10SupportedFeatures.List, Arch11NewFeatures.List>; def Arch12SupportedFeatures : SystemZFeatureAdd<Arch11SupportedFeatures.List, Arch12NewFeatures.List>; +def Arch13SupportedFeatures + : SystemZFeatureAdd<Arch12SupportedFeatures.List, Arch13NewFeatures.List>; -def Arch12UnsupportedFeatures +def Arch13UnsupportedFeatures : SystemZFeatureList<[]>; +def Arch12UnsupportedFeatures + : SystemZFeatureAdd<Arch13UnsupportedFeatures.List, Arch13NewFeatures.List>; def Arch11UnsupportedFeatures : SystemZFeatureAdd<Arch12UnsupportedFeatures.List, Arch12NewFeatures.List>; def Arch10UnsupportedFeatures diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 4d5570166a5..9dc4512255c 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -1480,6 +1480,23 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) { Node->getOperand(0).getOpcode() != ISD::Constant) if (auto *Op1 = dyn_cast<ConstantSDNode>(Node->getOperand(1))) { uint64_t Val = Op1->getZExtValue(); + // Don't split the operation if we can match one of the combined + // logical operations provided by miscellaneous-extensions-3. + if (Subtarget->hasMiscellaneousExtensions3()) { + unsigned ChildOpcode = Node->getOperand(0).getOpcode(); + // Check whether this expression matches NAND/NOR/NXOR. + if (Val == (uint64_t)-1 && Opcode == ISD::XOR) + if (ChildOpcode == ISD::AND || ChildOpcode == ISD::OR || + ChildOpcode == ISD::XOR) + break; + // Check whether this expression matches OR-with-complement. + if (Opcode == ISD::OR && ChildOpcode == ISD::XOR) { + auto Op0 = Node->getOperand(0); + if (auto *Op0Op1 = dyn_cast<ConstantSDNode>(Op0->getOperand(1))) + if (Op0Op1->getZExtValue() == (uint64_t)-1) + break; + } + } if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val)) { splitLargeImmediate(Opcode, Node, Node->getOperand(0), Val - uint32_t(Val), uint32_t(Val)); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 56ec3a5b588..f5323a0ee06 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -252,6 +252,12 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote); setOperationAction(ISD::CTLZ, MVT::i64, Legal); + // On arch13 we have native support for a 64-bit CTPOP. + if (Subtarget.hasMiscellaneousExtensions3()) { + setOperationAction(ISD::CTPOP, MVT::i32, Promote); + setOperationAction(ISD::CTPOP, MVT::i64, Legal); + } + // Give LowerOperation the chance to replace 64-bit ORs with subregs. setOperationAction(ISD::OR, MVT::i64, Custom); @@ -377,6 +383,17 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal); } + if (Subtarget.hasVectorEnhancements2()) { + setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v4f32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v4f32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal); + } + // Handle floating-point types. for (unsigned I = MVT::FIRST_FP_VALUETYPE; I <= MVT::LAST_FP_VALUETYPE; @@ -576,6 +593,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::STORE); + setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::FP_ROUND); setTargetDAGCombine(ISD::FP_EXTEND); @@ -1809,6 +1827,20 @@ static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { CCValid = SystemZ::CCMASK_ANY; return true; + case Intrinsic::s390_vstrsb: + case Intrinsic::s390_vstrsh: + case Intrinsic::s390_vstrsf: + Opcode = SystemZISD::VSTRS_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + + case Intrinsic::s390_vstrszb: + case Intrinsic::s390_vstrszh: + case Intrinsic::s390_vstrszf: + Opcode = SystemZISD::VSTRSZ_CC; + CCValid = SystemZ::CCMASK_ANY; + return true; + case Intrinsic::s390_vfcedbs: case Intrinsic::s390_vfcesbs: Opcode = SystemZISD::VFCMPES; @@ -4506,9 +4538,18 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, return GS.getNode(DAG, SDLoc(BVN)); } +bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const { + if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed()) + return true; + if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV) + return true; + return false; +} + // Combine GPR scalar values Elems into a vector of type VT. -static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, - SmallVectorImpl<SDValue> &Elems) { +SDValue +SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, + SmallVectorImpl<SDValue> &Elems) const { // See whether there is a single replicated value. SDValue Single; unsigned int NumElements = Elems.size(); @@ -4537,13 +4578,13 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, // we would need 2 instructions to replicate it: VLVGP followed by VREPx. // This is only a win if the single defined element is used more than once. // In other cases we're better off using a single VLVGx. - if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD)) + if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single))) return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single); // If all elements are loads, use VLREP/VLEs (below). bool AllLoads = true; for (auto Elem : Elems) - if (Elem.getOpcode() != ISD::LOAD || cast<LoadSDNode>(Elem)->isIndexed()) { + if (!isVectorElementLoad(Elem)) { AllLoads = false; break; } @@ -4615,8 +4656,7 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, std::map<const SDNode*, unsigned> UseCounts; SDNode *LoadMaxUses = nullptr; for (unsigned I = 0; I < NumElements; ++I) - if (Elems[I].getOpcode() == ISD::LOAD && - cast<LoadSDNode>(Elems[I])->isUnindexed()) { + if (isVectorElementLoad(Elems[I])) { SDNode *Ld = Elems[I].getNode(); UseCounts[Ld]++; if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld]) @@ -5152,6 +5192,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(VISTR_CC); OPCODE(VSTRC_CC); OPCODE(VSTRCZ_CC); + OPCODE(VSTRS_CC); + OPCODE(VSTRSZ_CC); OPCODE(TDC); OPCODE(ATOMIC_SWAPW); OPCODE(ATOMIC_LOADW_ADD); @@ -5171,6 +5213,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(ATOMIC_CMP_SWAP_128); OPCODE(LRV); OPCODE(STRV); + OPCODE(VLER); + OPCODE(VSTER); OPCODE(PREFETCH); } return nullptr; @@ -5484,6 +5528,31 @@ SDValue SystemZTargetLowering::combineLOAD( return SDValue(N, 0); } +bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const { + if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) + return true; + if (Subtarget.hasVectorEnhancements2()) + if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64) + return true; + return false; +} + +static bool isVectorElementSwap(ArrayRef<int> M, EVT VT) { + if (!VT.isVector() || !VT.isSimple() || + VT.getSizeInBits() != 128 || + VT.getScalarSizeInBits() % 8 != 0) + return false; + + unsigned NumElts = VT.getVectorNumElements(); + for (unsigned i = 0; i < NumElts; ++i) { + if (M[i] < 0) continue; // ignore UNDEF indices + if ((unsigned) M[i] != NumElts - 1 - i) + return false; + } + + return true; +} + SDValue SystemZTargetLowering::combineSTORE( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -5505,13 +5574,11 @@ SDValue SystemZTargetLowering::combineSTORE( SN->getMemOperand()); } } - // Combine STORE (BSWAP) into STRVH/STRV/STRVG + // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR if (!SN->isTruncatingStore() && Op1.getOpcode() == ISD::BSWAP && Op1.getNode()->hasOneUse() && - (Op1.getValueType() == MVT::i16 || - Op1.getValueType() == MVT::i32 || - Op1.getValueType() == MVT::i64)) { + canLoadStoreByteSwapped(Op1.getValueType())) { SDValue BSwapOp = Op1.getOperand(0); @@ -5526,15 +5593,97 @@ SDValue SystemZTargetLowering::combineSTORE( DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other), Ops, MemVT, SN->getMemOperand()); } + // Combine STORE (element-swap) into VSTER + if (!SN->isTruncatingStore() && + Op1.getOpcode() == ISD::VECTOR_SHUFFLE && + Op1.getNode()->hasOneUse() && + Subtarget.hasVectorEnhancements2()) { + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode()); + ArrayRef<int> ShuffleMask = SVN->getMask(); + if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) { + SDValue Ops[] = { + N->getOperand(0), Op1.getOperand(0), N->getOperand(2) + }; + + return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N), + DAG.getVTList(MVT::Other), + Ops, MemVT, SN->getMemOperand()); + } + } + + return SDValue(); +} + +SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + // Combine element-swap (LOAD) into VLER + if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && + N->getOperand(0).hasOneUse() && + Subtarget.hasVectorEnhancements2()) { + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); + ArrayRef<int> ShuffleMask = SVN->getMask(); + if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) { + SDValue Load = N->getOperand(0); + LoadSDNode *LD = cast<LoadSDNode>(Load); + + // Create the element-swapping load. + SDValue Ops[] = { + LD->getChain(), // Chain + LD->getBasePtr() // Ptr + }; + SDValue ESLoad = + DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N), + DAG.getVTList(LD->getValueType(0), MVT::Other), + Ops, LD->getMemoryVT(), LD->getMemOperand()); + + // First, combine the VECTOR_SHUFFLE away. This makes the value produced + // by the load dead. + DCI.CombineTo(N, ESLoad); + + // Next, combine the load away, we give it a bogus result value but a real + // chain result. The result value is dead because the shuffle is dead. + DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1)); + + // Return N so it doesn't get rechecked! + return SDValue(N, 0); + } + } + return SDValue(); } SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT( SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; if (!Subtarget.hasVector()) return SDValue(); + // Look through bitcasts that retain the number of vector elements. + SDValue Op = N->getOperand(0); + if (Op.getOpcode() == ISD::BITCAST && + Op.getValueType().isVector() && + Op.getOperand(0).getValueType().isVector() && + Op.getValueType().getVectorNumElements() == + Op.getOperand(0).getValueType().getVectorNumElements()) + Op = Op.getOperand(0); + + // Pull BSWAP out of a vector extraction. + if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) { + EVT VecVT = Op.getValueType(); + EVT EltVT = VecVT.getVectorElementType(); + Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT, + Op.getOperand(0), N->getOperand(1)); + DCI.AddToWorklist(Op.getNode()); + Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op); + if (EltVT != N->getValueType(0)) { + DCI.AddToWorklist(Op.getNode()); + Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op); + } + return Op; + } + // Try to simplify a vector extraction. if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) { SDValue Op0 = N->getOperand(0); @@ -5660,11 +5809,10 @@ SDValue SystemZTargetLowering::combineFP_EXTEND( SDValue SystemZTargetLowering::combineBSWAP( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; - // Combine BSWAP (LOAD) into LRVH/LRV/LRVG + // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && N->getOperand(0).hasOneUse() && - (N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i32 || - N->getValueType(0) == MVT::i64)) { + canLoadStoreByteSwapped(N->getValueType(0))) { SDValue Load = N->getOperand(0); LoadSDNode *LD = cast<LoadSDNode>(Load); @@ -5697,6 +5845,74 @@ SDValue SystemZTargetLowering::combineBSWAP( // Return N so it doesn't get rechecked! return SDValue(N, 0); } + + // Look through bitcasts that retain the number of vector elements. + SDValue Op = N->getOperand(0); + if (Op.getOpcode() == ISD::BITCAST && + Op.getValueType().isVector() && + Op.getOperand(0).getValueType().isVector() && + Op.getValueType().getVectorNumElements() == + Op.getOperand(0).getValueType().getVectorNumElements()) + Op = Op.getOperand(0); + + // Push BSWAP into a vector insertion if at least one side then simplifies. + if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) { + SDValue Vec = Op.getOperand(0); + SDValue Elt = Op.getOperand(1); + SDValue Idx = Op.getOperand(2); + + if (DAG.isConstantIntBuildVectorOrConstantInt(Vec) || + Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() || + DAG.isConstantIntBuildVectorOrConstantInt(Elt) || + Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() || + (canLoadStoreByteSwapped(N->getValueType(0)) && + ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) { + EVT VecVT = N->getValueType(0); + EVT EltVT = N->getValueType(0).getVectorElementType(); + if (VecVT != Vec.getValueType()) { + Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec); + DCI.AddToWorklist(Vec.getNode()); + } + if (EltVT != Elt.getValueType()) { + Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt); + DCI.AddToWorklist(Elt.getNode()); + } + Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec); + DCI.AddToWorklist(Vec.getNode()); + Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt); + DCI.AddToWorklist(Elt.getNode()); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT, + Vec, Elt, Idx); + } + } + + // Push BSWAP into a vector shuffle if at least one side then simplifies. + ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op); + if (SV && Op.hasOneUse()) { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + + if (DAG.isConstantIntBuildVectorOrConstantInt(Op0) || + Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() || + DAG.isConstantIntBuildVectorOrConstantInt(Op1) || + Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) { + EVT VecVT = N->getValueType(0); + if (VecVT != Op0.getValueType()) { + Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0); + DCI.AddToWorklist(Op0.getNode()); + } + if (VecVT != Op1.getValueType()) { + Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1); + DCI.AddToWorklist(Op1.getNode()); + } + Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0); + DCI.AddToWorklist(Op0.getNode()); + Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1); + DCI.AddToWorklist(Op1.getNode()); + return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask()); + } + } + return SDValue(); } @@ -5919,6 +6135,7 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI); case ISD::LOAD: return combineLOAD(N, DCI); case ISD::STORE: return combineSTORE(N, DCI); + case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI); case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI); case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI); case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 75f653340e6..23cdcc72bc4 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -281,6 +281,8 @@ enum NodeType : unsigned { VISTR_CC, VSTRC_CC, VSTRCZ_CC, + VSTRS_CC, + VSTRSZ_CC, // Test Data Class. // @@ -340,6 +342,9 @@ enum NodeType : unsigned { // Byte swapping load/store. Same operands as regular load/store. LRV, STRV, + // Element swapping load/store. Same operands as regular load/store. + VLER, VSTER, + // Prefetch from the second operand using the 4-bit control code in // the first operand. The code is 1 for a load prefetch and 2 for // a store prefetch. @@ -571,6 +576,9 @@ private: SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + bool isVectorElementLoad(SDValue Op) const; + SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, + SmallVectorImpl<SDValue> &Elems) const; SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; @@ -590,8 +598,10 @@ private: SDValue combineSIGN_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSIGN_EXTEND_INREG(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineMERGE(SDNode *N, DAGCombinerInfo &DCI) const; + bool canLoadStoreByteSwapped(EVT VT) const; SDValue combineLOAD(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSTORE(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineVECTOR_SHUFFLE(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineEXTRACT_VECTOR_ELT(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td index 1075861ac89..2a1d14de3dd 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -1414,13 +1414,15 @@ class InstVRRi<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> bits<4> R1; bits<5> V2; bits<4> M3; + bits<4> M4; let Inst{47-40} = op{15-8}; let Inst{39-36} = R1; let Inst{35-32} = V2{3-0}; let Inst{31-24} = 0; let Inst{23-20} = M3; - let Inst{19-12} = 0; + let Inst{19-16} = M4; + let Inst{15-12} = 0; let Inst{11} = 0; let Inst{10} = V2{4}; let Inst{9-8} = 0; @@ -2489,12 +2491,18 @@ class StoreVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator, TypedReg tr, bits<5> bytes, bits<4> type = 0> : InstVRX<opcode, (outs), (ins tr.op:$V1, bdxaddr12only:$XBD2), mnemonic#"\t$V1, $XBD2", - [(set (tr.vt tr.op:$V1), (operator bdxaddr12only:$XBD2))]> { + [(operator (tr.vt tr.op:$V1), bdxaddr12only:$XBD2)]> { let M3 = type; let mayStore = 1; let AccessBytes = bytes; } +class StoreVRXGeneric<string mnemonic, bits<16> opcode> + : InstVRX<opcode, (outs), (ins VR128:$V1, bdxaddr12only:$XBD2, imm32zx4:$M3), + mnemonic#"\t$V1, $XBD2, $M3", []> { + let mayStore = 1; +} + multiclass StoreVRXAlign<string mnemonic, bits<16> opcode> { let mayStore = 1, AccessBytes = 16 in { def Align : InstVRX<opcode, (outs), @@ -3151,6 +3159,11 @@ class BinaryRRFb<string mnemonic, bits<16> opcode, SDPatternOperator operator, let M4 = 0; } +class BinaryRRFc<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRFc<opcode, (outs cls1:$R1), (ins cls2:$R2, imm32zx4:$M3), + mnemonic#"\t$R1, $R2, $M3", []>; + class BinaryMemRRFc<string mnemonic, bits<16> opcode, RegisterOperand cls1, RegisterOperand cls2, Immediate imm> : InstRRFc<opcode, (outs cls2:$R2, cls1:$R1), (ins cls1:$R1src, imm:$M3), @@ -3218,6 +3231,41 @@ multiclass CondBinaryRRFPair<string mnemonic, bits<16> opcode, def Asm : AsmCondBinaryRRF<mnemonic, opcode, cls1, cls2>; } +class CondBinaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2, RegisterOperand cls3> + : InstRRFa<opcode, (outs cls1:$R1), + (ins cls3:$R3, cls2:$R2, cond4:$valid, cond4:$M4), + mnemonic#"$M4\t$R1, $R2, $R3", + [(set cls1:$R1, (z_select_ccmask cls2:$R2, cls3:$R3, + cond4:$valid, cond4:$M4))]> { + let CCMaskLast = 1; +} + +// Like CondBinaryRRFa, but used for the raw assembly form. The condition-code +// mask is the third operand rather than being part of the mnemonic. +class AsmCondBinaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2, RegisterOperand cls3> + : InstRRFa<opcode, (outs cls1:$R1), (ins cls3:$R3, cls2:$R2, imm32zx4:$M4), + mnemonic#"\t$R1, $R2, $R3, $M4", []>; + +// Like CondBinaryRRFa, but with a fixed CC mask. +class FixedCondBinaryRRFa<CondVariant V, string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFa<opcode, (outs cls1:$R1), (ins cls3:$R3, cls2:$R2), + mnemonic#V.suffix#"\t$R1, $R2, $R3", []> { + let isAsmParserOnly = V.alternate; + let M4 = V.ccmask; +} + +multiclass CondBinaryRRFaPair<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> { + let isCodeGenOnly = 1 in + def "" : CondBinaryRRFa<mnemonic, opcode, cls1, cls2, cls3>; + def Asm : AsmCondBinaryRRFa<mnemonic, opcode, cls1, cls2, cls3>; +} + class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> : InstRIa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2), @@ -3612,7 +3660,9 @@ class BinaryVRRf<string mnemonic, bits<16> opcode, SDPatternOperator operator, class BinaryVRRi<string mnemonic, bits<16> opcode, RegisterOperand cls> : InstVRRi<opcode, (outs cls:$R1), (ins VR128:$V2, imm32zx4:$M3), - mnemonic#"\t$R1, $V2, $M3", []>; + mnemonic#"\t$R1, $V2, $M3", []> { + let M4 = 0; +} class BinaryVRSa<string mnemonic, bits<16> opcode, SDPatternOperator operator, TypedReg tr1, TypedReg tr2, bits<4> type> @@ -3990,6 +4040,17 @@ class SideEffectTernaryRRFa<string mnemonic, bits<16> opcode, let M4 = 0; } +class SideEffectTernaryMemMemRRFa<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFa<opcode, (outs cls1:$R1, cls2:$R2), + (ins cls1:$R1src, cls2:$R2src, cls3:$R3), + mnemonic#"\t$R1, $R2, $R3", []> { + let Constraints = "$R1 = $R1src, $R2 = $R2src"; + let DisableEncoding = "$R1src, $R2src"; + let M4 = 0; +} + class SideEffectTernaryRRFb<string mnemonic, bits<16> opcode, RegisterOperand cls1, RegisterOperand cls2, RegisterOperand cls3> @@ -4278,7 +4339,7 @@ class TernaryVRRcFloatGeneric<string mnemonic, bits<16> opcode> mnemonic#"\t$V1, $V2, $V3, $M4, $M5, $M6", []>; class TernaryVRRd<string mnemonic, bits<16> opcode, SDPatternOperator operator, - TypedReg tr1, TypedReg tr2, bits<4> type = 0> + TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m6 = 0> : InstVRRd<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4), mnemonic#"\t$V1, $V2, $V3, $V4", @@ -4286,7 +4347,7 @@ class TernaryVRRd<string mnemonic, bits<16> opcode, SDPatternOperator operator, (tr2.vt tr2.op:$V3), (tr1.vt tr1.op:$V4)))]> { let M5 = type; - let M6 = 0; + let M6 = m6; } class TernaryVRRdGeneric<string mnemonic, bits<16> opcode> @@ -4296,6 +4357,34 @@ class TernaryVRRdGeneric<string mnemonic, bits<16> opcode> let M6 = 0; } +// Ternary operation where the assembler mnemonic has an extra operand to +// optionally allow specifiying arbitrary M6 values. +multiclass TernaryExtraVRRd<string mnemonic, bits<16> opcode, + SDPatternOperator operator, + TypedReg tr1, TypedReg tr2, bits<4> type> { + let M5 = type, Defs = [CC] in + def "" : InstVRRd<opcode, (outs tr1.op:$V1), + (ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4, imm32zx4:$M6), + mnemonic#"\t$V1, $V2, $V3, $V4, $M6", []>; + def : Pat<(operator (tr2.vt tr2.op:$V2), (tr2.vt tr2.op:$V3), + (tr1.vt tr1.op:$V4)), + (!cast<Instruction>(NAME) tr2.op:$V2, tr2.op:$V3, tr1.op:$V4, 0)>; + def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4", + (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2, + tr2.op:$V3, tr1.op:$V4, 0)>; +} + +multiclass TernaryExtraVRRdGeneric<string mnemonic, bits<16> opcode> { + let Defs = [CC] in + def "" : InstVRRd<opcode, (outs VR128:$V1), + (ins VR128:$V2, VR128:$V3, VR128:$V4, + imm32zx4:$M5, imm32zx4:$M6), + mnemonic#"\t$V1, $V2, $V3, $V4, $M5, $M6", []>; + def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4, $M5", + (!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3, + VR128:$V4, imm32zx4:$M5, 0)>; +} + class TernaryVRRe<string mnemonic, bits<16> opcode, SDPatternOperator operator, TypedReg tr1, TypedReg tr2, bits<4> m5 = 0, bits<4> type = 0> : InstVRRe<opcode, (outs tr1.op:$V1), @@ -4326,6 +4415,11 @@ class TernaryVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator, let M4 = type; } +class TernaryVRRi<string mnemonic, bits<16> opcode, RegisterOperand cls> + : InstVRRi<opcode, (outs cls:$R1), (ins VR128:$V2, + imm32zx4:$M3, imm32zx4:$M4), + mnemonic#"\t$R1, $V2, $M3, $M4", []>; + class TernaryVRSbGeneric<string mnemonic, bits<16> opcode> : InstVRSb<opcode, (outs VR128:$V1), (ins VR128:$V1src, GR64:$R3, shift12only:$BD2, imm32zx4:$M4), @@ -4705,6 +4799,17 @@ class CondBinaryRRFPseudo<RegisterOperand cls1, RegisterOperand cls2> let CCMaskLast = 1; } +// Like CondBinaryRRFa, but expanded after RA depending on the choice of +// register. +class CondBinaryRRFaPseudo<RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : Pseudo<(outs cls1:$R1), + (ins cls3:$R3, cls2:$R2, cond4:$valid, cond4:$M4), + [(set cls1:$R1, (z_select_ccmask cls2:$R2, cls3:$R3, + cond4:$valid, cond4:$M4))]> { + let CCMaskLast = 1; +} + // Like CondBinaryRIE, but expanded after RA depending on the choice of // register. class CondBinaryRIEPseudo<RegisterOperand cls, Immediate imm> diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index 8b3428a840f..b9b6e630435 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -223,6 +223,65 @@ void SystemZInstrInfo::expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode, // correctly. This change is defered to the SystemZExpandPseudo pass. } +// MI is a select pseudo instruction. Replace it with LowOpcode if source +// and destination are all low GR32s and HighOpcode if source and destination +// are all high GR32s. Otherwise, use the two-operand MixedOpcode. +void SystemZInstrInfo::expandSELRPseudo(MachineInstr &MI, unsigned LowOpcode, + unsigned HighOpcode, + unsigned MixedOpcode) const { + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned Src1Reg = MI.getOperand(1).getReg(); + unsigned Src2Reg = MI.getOperand(2).getReg(); + bool DestIsHigh = isHighReg(DestReg); + bool Src1IsHigh = isHighReg(Src1Reg); + bool Src2IsHigh = isHighReg(Src2Reg); + + // If sources and destination aren't all high or all low, we may be able to + // simplify the operation by moving one of the sources to the destination + // first. But only if this doesn't clobber the other source. + if (DestReg != Src1Reg && DestReg != Src2Reg) { + if (DestIsHigh != Src1IsHigh) { + emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, Src1Reg, + SystemZ::LR, 32, MI.getOperand(1).isKill(), + MI.getOperand(1).isUndef()); + MI.getOperand(1).setReg(DestReg); + Src1Reg = DestReg; + Src1IsHigh = DestIsHigh; + } else if (DestIsHigh != Src2IsHigh) { + emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, Src2Reg, + SystemZ::LR, 32, MI.getOperand(2).isKill(), + MI.getOperand(2).isUndef()); + MI.getOperand(2).setReg(DestReg); + Src2Reg = DestReg; + Src2IsHigh = DestIsHigh; + } + } + + // If the destination (now) matches one source, prefer this to be first. + if (DestReg != Src1Reg && DestReg == Src2Reg) { + commuteInstruction(MI, false, 1, 2); + std::swap(Src1Reg, Src2Reg); + std::swap(Src1IsHigh, Src2IsHigh); + } + + if (!DestIsHigh && !Src1IsHigh && !Src2IsHigh) + MI.setDesc(get(LowOpcode)); + else if (DestIsHigh && Src1IsHigh && Src2IsHigh) + MI.setDesc(get(HighOpcode)); + else { + // Given the simplifcation above, we must already have a two-operand case. + assert (DestReg == Src1Reg); + MI.setDesc(get(MixedOpcode)); + MI.tieOperands(0, 1); + LOCRMuxJumps++; + } + + // If we were unable to implement the pseudo with a single instruction, we + // need to convert it back into a branch sequence. This cannot be done here + // since the caller of expandPostRAPseudo does not handle changes to the CFG + // correctly. This change is defered to the SystemZExpandPseudo pass. +} + // MI is an RR-style pseudo instruction that zero-extends the low Size bits // of one GRX32 into another. Replace it with LowOpcode if both operands // are low registers, otherwise use RISB[LH]G. @@ -312,6 +371,10 @@ MachineInstr *SystemZInstrInfo::commuteInstructionImpl(MachineInstr &MI, }; switch (MI.getOpcode()) { + case SystemZ::SELRMux: + case SystemZ::SELFHR: + case SystemZ::SELR: + case SystemZ::SELGR: case SystemZ::LOCRMux: case SystemZ::LOCFHR: case SystemZ::LOCR: @@ -606,7 +669,9 @@ void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB, unsigned Opc; if (SystemZ::GRX32BitRegClass.hasSubClassEq(RC)) { - if (STI.hasLoadStoreOnCond2()) + if (STI.hasMiscellaneousExtensions3()) + Opc = SystemZ::SELRMux; + else if (STI.hasLoadStoreOnCond2()) Opc = SystemZ::LOCRMux; else { Opc = SystemZ::LOCR; @@ -618,9 +683,12 @@ void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB, TrueReg = TReg; FalseReg = FReg; } - } else if (SystemZ::GR64BitRegClass.hasSubClassEq(RC)) - Opc = SystemZ::LOCGR; - else + } else if (SystemZ::GR64BitRegClass.hasSubClassEq(RC)) { + if (STI.hasMiscellaneousExtensions3()) + Opc = SystemZ::SELGR; + else + Opc = SystemZ::LOCGR; + } else llvm_unreachable("Invalid register class"); BuildMI(MBB, I, DL, get(Opc), DstReg) @@ -643,7 +711,11 @@ bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned NewUseOpc; unsigned UseIdx; int CommuteIdx = -1; + bool TieOps = false; switch (UseOpc) { + case SystemZ::SELRMux: + TieOps = true; + /* fall through */ case SystemZ::LOCRMux: if (!STI.hasLoadStoreOnCond2()) return false; @@ -655,6 +727,9 @@ bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, else return false; break; + case SystemZ::SELGR: + TieOps = true; + /* fall through */ case SystemZ::LOCGR: if (!STI.hasLoadStoreOnCond2()) return false; @@ -676,6 +751,8 @@ bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, bool DeleteDef = MRI->hasOneNonDBGUse(Reg); UseMI.setDesc(get(NewUseOpc)); + if (TieOps) + UseMI.tieOperands(0, 1); UseMI.getOperand(UseIdx).ChangeToImmediate(ImmVal); if (DeleteDef) DefMI.eraseFromParent(); @@ -1285,6 +1362,11 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { expandLOCRPseudo(MI, SystemZ::LOCR, SystemZ::LOCFHR); return true; + case SystemZ::SELRMux: + expandSELRPseudo(MI, SystemZ::SELR, SystemZ::SELFHR, + SystemZ::LOCRMux); + return true; + case SystemZ::STCMux: expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH); return true; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h index 1485e63fe33..2edde175542 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -162,6 +162,8 @@ class SystemZInstrInfo : public SystemZGenInstrInfo { unsigned HighOpcode) const; void expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned HighOpcode) const; + void expandSELRPseudo(MachineInstr &MI, unsigned LowOpcode, + unsigned HighOpcode, unsigned MixedOpcode) const; void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned Size) const; void expandLoadStackGuard(MachineInstr *MI) const; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index 201a485c8df..91856893e3b 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -474,6 +474,11 @@ let mayLoad = 1, mayStore = 1, Defs = [CC] in { def MVCLU : SideEffectTernaryMemMemRSY<"mvclu", 0xEB8E, GR128, GR128>; } +// Move right. +let Predicates = [FeatureMiscellaneousExtensions3], + mayLoad = 1, mayStore = 1, Uses = [R0L] in + def MVCRL : SideEffectBinarySSE<"mvcrl", 0xE50A>; + // String moves. let mayLoad = 1, mayStore = 1, Defs = [CC] in defm MVST : StringRRE<"mvst", 0xB255, z_stpcpy>; @@ -482,6 +487,29 @@ let mayLoad = 1, mayStore = 1, Defs = [CC] in // Conditional move instructions //===----------------------------------------------------------------------===// +let Predicates = [FeatureMiscellaneousExtensions3], Uses = [CC] in { + // Select. + let isCommutable = 1 in { + // Expands to SELR or SELFHR or a branch-and-move sequence, + // depending on the choice of registers. + def SELRMux : CondBinaryRRFaPseudo<GRX32, GRX32, GRX32>; + defm SELFHR : CondBinaryRRFaPair<"selfhr", 0xB9C0, GRH32, GRH32, GRH32>; + defm SELR : CondBinaryRRFaPair<"selr", 0xB9F0, GR32, GR32, GR32>; + defm SELGR : CondBinaryRRFaPair<"selgr", 0xB9E3, GR64, GR64, GR64>; + } + + // Define AsmParser extended mnemonics for each general condition-code mask. + foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE", + "Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in { + def SELRAsm#V : FixedCondBinaryRRFa<CV<V>, "selr", 0xB9F0, + GR32, GR32, GR32>; + def SELFHRAsm#V : FixedCondBinaryRRFa<CV<V>, "selfhr", 0xB9C0, + GRH32, GRH32, GRH32>; + def SELGRAsm#V : FixedCondBinaryRRFa<CV<V>, "selgr", 0xB9E3, + GR64, GR64, GR64>; + } +} + let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in { // Load immediate on condition. Matched via DAG pattern and created // by the PeepholeOptimizer via FoldImmediate. @@ -1244,6 +1272,43 @@ defm : RMWIByte<xor, bdaddr12pair, XI>; defm : RMWIByte<xor, bdaddr20pair, XIY>; //===----------------------------------------------------------------------===// +// Combined logical operations +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureMiscellaneousExtensions3], + Defs = [CC] in { + // AND with complement. + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + def NCRK : BinaryRRFa<"ncrk", 0xB9F5, andc, GR32, GR32, GR32>; + def NCGRK : BinaryRRFa<"ncgrk", 0xB9E5, andc, GR64, GR64, GR64>; + } + + // OR with complement. + let CCValues = 0xC, CompareZeroCCMask = 0x8 in { + def OCRK : BinaryRRFa<"ocrk", 0xB975, orc, GR32, GR32, GR32>; + def OCGRK : BinaryRRFa<"ocgrk", 0xB965, orc, GR64, GR64, GR64>; + } + + // NAND. + let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in { + def NNRK : BinaryRRFa<"nnrk", 0xB974, nand, GR32, GR32, GR32>; + def NNGRK : BinaryRRFa<"nngrk", 0xB964, nand, GR64, GR64, GR64>; + } + + // NOR. + let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in { + def NORK : BinaryRRFa<"nork", 0xB976, nor, GR32, GR32, GR32>; + def NOGRK : BinaryRRFa<"nogrk", 0xB966, nor, GR64, GR64, GR64>; + } + + // NXOR. + let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in { + def NXRK : BinaryRRFa<"nxrk", 0xB977, nxor, GR32, GR32, GR32>; + def NXGRK : BinaryRRFa<"nxgrk", 0xB967, nxor, GR64, GR64, GR64>; + } +} + +//===----------------------------------------------------------------------===// // Multiplication //===----------------------------------------------------------------------===// @@ -1837,6 +1902,9 @@ let mayLoad = 1, mayStore = 1, Uses = [R0L, R1D], Defs = [CC] in { let Predicates = [FeatureMessageSecurityAssist8] in def KMA : SideEffectTernaryMemMemMemRRFb<"kma", 0xB929, GR128, GR128, GR128>; + + let Predicates = [FeatureMessageSecurityAssist9] in + def KDSA : SideEffectBinaryMemRRE<"kdsa", 0xB93A, GR64, GR128>; } //===----------------------------------------------------------------------===// @@ -2017,7 +2085,12 @@ let Defs = [CC] in def : Pat<(ctlz GR64:$src), (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>; -// Population count. Counts bits set per byte. +// Population count. Counts bits set per byte or doubleword. +let Predicates = [FeatureMiscellaneousExtensions3] in { + let Defs = [CC] in + def POPCNTOpt : BinaryRRFc<"popcnt", 0xB9E1, GR64, GR64>; + def : Pat<(ctpop GR64:$src), (POPCNTOpt GR64:$src, 8)>; +} let Predicates = [FeaturePopulationCount], Defs = [CC] in def POPCNT : UnaryRRE<"popcnt", 0xB9E1, z_popcnt, GR64, GR64>; @@ -2048,6 +2121,17 @@ let mayLoad = 1, Defs = [CC] in let mayLoad = 1, mayStore = 1, Defs = [CC, R1D], Uses = [R0L, R1D] in def CMPSC : SideEffectBinaryMemMemRRE<"cmpsc", 0xB263, GR128, GR128>; +// Sort lists. +let Predicates = [FeatureEnhancedSort], + mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0L, R1D] in + def SORTL : SideEffectBinaryMemMemRRE<"sortl", 0xB938, GR128, GR128>; + +// Deflate conversion call. +let Predicates = [FeatureDeflateConversion], + mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0L, R1D] in + def DFLTCC : SideEffectTernaryMemMemRRFa<"dfltcc", 0xB939, + GR128, GR128, GR64>; + // Execute. let hasSideEffects = 1 in { def EX : SideEffectBinaryRX<"ex", 0x44, GR64>; diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td index 71f05000f5d..261727f8905 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -249,6 +249,81 @@ let Predicates = [FeatureVectorPackedDecimal] in { } //===----------------------------------------------------------------------===// +// Byte swaps +//===----------------------------------------------------------------------===// + +let Predicates = [FeatureVectorEnhancements2] in { + // Load byte-reversed elements. + def VLBR : UnaryVRXGeneric<"vlbr", 0xE606>; + def VLBRH : UnaryVRX<"vlbrh", 0xE606, z_loadbswap, v128h, 16, 1>; + def VLBRF : UnaryVRX<"vlbrf", 0xE606, z_loadbswap, v128f, 16, 2>; + def VLBRG : UnaryVRX<"vlbrg", 0xE606, z_loadbswap, v128g, 16, 3>; + def VLBRQ : UnaryVRX<"vlbrq", 0xE606, null_frag, v128q, 16, 4>; + + // Load elements reversed. + def VLER : UnaryVRXGeneric<"vler", 0xE607>; + def VLERH : UnaryVRX<"vlerh", 0xE607, z_loadeswap, v128h, 16, 1>; + def VLERF : UnaryVRX<"vlerf", 0xE607, z_loadeswap, v128f, 16, 2>; + def VLERG : UnaryVRX<"vlerg", 0xE607, z_loadeswap, v128g, 16, 3>; + def : Pat<(v4f32 (z_loadeswap bdxaddr12only:$addr)), + (VLERF bdxaddr12only:$addr)>; + def : Pat<(v2f64 (z_loadeswap bdxaddr12only:$addr)), + (VLERG bdxaddr12only:$addr)>; + def : Pat<(v16i8 (z_loadeswap bdxaddr12only:$addr)), + (VLBRQ bdxaddr12only:$addr)>; + + // Load byte-reversed element. + def VLEBRH : TernaryVRX<"vlebrh", 0xE601, z_vlebri16, v128h, v128h, 2, imm32zx3>; + def VLEBRF : TernaryVRX<"vlebrf", 0xE603, z_vlebri32, v128f, v128f, 4, imm32zx2>; + def VLEBRG : TernaryVRX<"vlebrg", 0xE602, z_vlebri64, v128g, v128g, 8, imm32zx1>; + + // Load byte-reversed element and zero. + def VLLEBRZ : UnaryVRXGeneric<"vllebrz", 0xE604>; + def VLLEBRZH : UnaryVRX<"vllebrzh", 0xE604, z_vllebrzi16, v128h, 2, 1>; + def VLLEBRZF : UnaryVRX<"vllebrzf", 0xE604, z_vllebrzi32, v128f, 4, 2>; + def VLLEBRZG : UnaryVRX<"vllebrzg", 0xE604, z_vllebrzi64, v128g, 8, 3>; + def VLLEBRZE : UnaryVRX<"vllebrze", 0xE604, z_vllebrzli32, v128f, 4, 6>; + def : InstAlias<"lerv\t$V1, $XBD2", + (VLLEBRZE VR128:$V1, bdxaddr12only:$XBD2), 0>; + def : InstAlias<"ldrv\t$V1, $XBD2", + (VLLEBRZG VR128:$V1, bdxaddr12only:$XBD2), 0>; + + // Load byte-reversed element and replicate. + def VLBRREP : UnaryVRXGeneric<"vlbrrep", 0xE605>; + def VLBRREPH : UnaryVRX<"vlbrreph", 0xE605, z_replicate_loadbswapi16, v128h, 2, 1>; + def VLBRREPF : UnaryVRX<"vlbrrepf", 0xE605, z_replicate_loadbswapi32, v128f, 4, 2>; + def VLBRREPG : UnaryVRX<"vlbrrepg", 0xE605, z_replicate_loadbswapi64, v128g, 8, 3>; + + // Store byte-reversed elements. + def VSTBR : StoreVRXGeneric<"vstbr", 0xE60E>; + def VSTBRH : StoreVRX<"vstbrh", 0xE60E, z_storebswap, v128h, 16, 1>; + def VSTBRF : StoreVRX<"vstbrf", 0xE60E, z_storebswap, v128f, 16, 2>; + def VSTBRG : StoreVRX<"vstbrg", 0xE60E, z_storebswap, v128g, 16, 3>; + def VSTBRQ : StoreVRX<"vstbrq", 0xE60E, null_frag, v128q, 16, 4>; + + // Store elements reversed. + def VSTER : StoreVRXGeneric<"vster", 0xE60F>; + def VSTERH : StoreVRX<"vsterh", 0xE60F, z_storeeswap, v128h, 16, 1>; + def VSTERF : StoreVRX<"vsterf", 0xE60F, z_storeeswap, v128f, 16, 2>; + def VSTERG : StoreVRX<"vsterg", 0xE60F, z_storeeswap, v128g, 16, 3>; + def : Pat<(z_storeeswap (v4f32 VR128:$val), bdxaddr12only:$addr), + (VSTERF VR128:$val, bdxaddr12only:$addr)>; + def : Pat<(z_storeeswap (v2f64 VR128:$val), bdxaddr12only:$addr), + (VSTERG VR128:$val, bdxaddr12only:$addr)>; + def : Pat<(z_storeeswap (v16i8 VR128:$val), bdxaddr12only:$addr), + (VSTBRQ VR128:$val, bdxaddr12only:$addr)>; + + // Store byte-reversed element. + def VSTEBRH : StoreBinaryVRX<"vstebrh", 0xE609, z_vstebri16, v128h, 2, imm32zx3>; + def VSTEBRF : StoreBinaryVRX<"vstebrf", 0xE60B, z_vstebri32, v128f, 4, imm32zx2>; + def VSTEBRG : StoreBinaryVRX<"vstebrg", 0xE60A, z_vstebri64, v128g, 8, imm32zx1>; + def : InstAlias<"sterv\t$V1, $XBD2", + (VSTEBRF VR128:$V1, bdxaddr12only:$XBD2, 0), 0>; + def : InstAlias<"stdrv\t$V1, $XBD2", + (VSTEBRG VR128:$V1, bdxaddr12only:$XBD2, 0), 0>; +} + +//===----------------------------------------------------------------------===// // Selects and permutes //===----------------------------------------------------------------------===// @@ -706,6 +781,10 @@ let Predicates = [FeatureVector] in { def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8:$z), (VSLDB VR128:$x, VR128:$y, imm32zx8:$z)>; + // Shift left double by bit. + let Predicates = [FeatureVectorEnhancements2] in + def VSLD : TernaryVRId<"vsld", 0xE786, int_s390_vsld, v128b, v128b, 0>; + // Shift right arithmetic. def VSRA : BinaryVRRc<"vsra", 0xE77E, int_s390_vsra, v128b, v128b>; @@ -718,6 +797,10 @@ let Predicates = [FeatureVector] in { // Shift right logical by byte. def VSRLB : BinaryVRRc<"vsrlb", 0xE77D, int_s390_vsrlb, v128b, v128b>; + // Shift right double by bit. + let Predicates = [FeatureVectorEnhancements2] in + def VSRD : TernaryVRId<"vsrd", 0xE787, int_s390_vsrd, v128b, v128b, 0>; + // Subtract. def VS : BinaryVRRcGeneric<"vs", 0xE7F7>; def VSB : BinaryVRRc<"vsb", 0xE7F7, sub, v128b, v128b, 0>; @@ -945,23 +1028,41 @@ let Predicates = [FeatureVector] in { } } - // Convert from fixed 64-bit. + // Convert from fixed. let Uses = [FPC], mayRaiseFPException = 1 in { def VCDG : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>; def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>; def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>; } def : FPConversion<VCDGB, sint_to_fp, v128db, v128g, 0, 0>; + let Predicates = [FeatureVectorEnhancements2] in { + let Uses = [FPC], mayRaiseFPException = 1 in { + let isAsmParserOnly = 1 in + def VCFPS : TernaryVRRaFloatGeneric<"vcfps", 0xE7C3>; + def VCEFB : TernaryVRRa<"vcefb", 0xE7C3, null_frag, v128sb, v128g, 2, 0>; + def WCEFB : TernaryVRRa<"wcefb", 0xE7C3, null_frag, v32sb, v32f, 2, 8>; + } + def : FPConversion<VCEFB, sint_to_fp, v128sb, v128f, 0, 0>; + } - // Convert from logical 64-bit. + // Convert from logical. let Uses = [FPC], mayRaiseFPException = 1 in { def VCDLG : TernaryVRRaFloatGeneric<"vcdlg", 0xE7C1>; def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>; def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>; } def : FPConversion<VCDLGB, uint_to_fp, v128db, v128g, 0, 0>; + let Predicates = [FeatureVectorEnhancements2] in { + let Uses = [FPC], mayRaiseFPException = 1 in { + let isAsmParserOnly = 1 in + def VCFPL : TernaryVRRaFloatGeneric<"vcfpl", 0xE7C1>; + def VCELFB : TernaryVRRa<"vcelfb", 0xE7C1, null_frag, v128sb, v128g, 2, 0>; + def WCELFB : TernaryVRRa<"wcelfb", 0xE7C1, null_frag, v32sb, v32f, 2, 8>; + } + def : FPConversion<VCELFB, uint_to_fp, v128sb, v128f, 0, 0>; + } - // Convert to fixed 64-bit. + // Convert to fixed. let Uses = [FPC], mayRaiseFPException = 1 in { def VCGD : TernaryVRRaFloatGeneric<"vcgd", 0xE7C2>; def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>; @@ -969,8 +1070,18 @@ let Predicates = [FeatureVector] in { } // Rounding mode should agree with SystemZInstrFP.td. def : FPConversion<VCGDB, fp_to_sint, v128g, v128db, 0, 5>; + let Predicates = [FeatureVectorEnhancements2] in { + let Uses = [FPC], mayRaiseFPException = 1 in { + let isAsmParserOnly = 1 in + def VCSFP : TernaryVRRaFloatGeneric<"vcsfp", 0xE7C2>; + def VCFEB : TernaryVRRa<"vcfeb", 0xE7C2, null_frag, v128sb, v128g, 2, 0>; + def WCFEB : TernaryVRRa<"wcfeb", 0xE7C2, null_frag, v32sb, v32f, 2, 8>; + } + // Rounding mode should agree with SystemZInstrFP.td. + def : FPConversion<VCFEB, fp_to_sint, v128f, v128sb, 0, 5>; + } - // Convert to logical 64-bit. + // Convert to logical. let Uses = [FPC], mayRaiseFPException = 1 in { def VCLGD : TernaryVRRaFloatGeneric<"vclgd", 0xE7C0>; def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>; @@ -978,6 +1089,16 @@ let Predicates = [FeatureVector] in { } // Rounding mode should agree with SystemZInstrFP.td. def : FPConversion<VCLGDB, fp_to_uint, v128g, v128db, 0, 5>; + let Predicates = [FeatureVectorEnhancements2] in { + let Uses = [FPC], mayRaiseFPException = 1 in { + let isAsmParserOnly = 1 in + def VCLFP : TernaryVRRaFloatGeneric<"vclfp", 0xE7C0>; + def VCLFEB : TernaryVRRa<"vclfeb", 0xE7C0, null_frag, v128sb, v128g, 2, 0>; + def WCLFEB : TernaryVRRa<"wclfeb", 0xE7C0, null_frag, v32sb, v32f, 2, 8>; + } + // Rounding mode should agree with SystemZInstrFP.td. + def : FPConversion<VCLFEB, fp_to_uint, v128f, v128sb, 0, 5>; + } // Divide. let Uses = [FPC], mayRaiseFPException = 1 in { @@ -1568,6 +1689,24 @@ let Predicates = [FeatureVector] in { z_vstrcz_cc, v128f, v128f, 2, 2>; } +let Predicates = [FeatureVectorEnhancements2] in { + defm VSTRS : TernaryExtraVRRdGeneric<"vstrs", 0xE78B>; + defm VSTRSB : TernaryExtraVRRd<"vstrsb", 0xE78B, + z_vstrs_cc, v128b, v128b, 0>; + defm VSTRSH : TernaryExtraVRRd<"vstrsh", 0xE78B, + z_vstrs_cc, v128b, v128h, 1>; + defm VSTRSF : TernaryExtraVRRd<"vstrsf", 0xE78B, + z_vstrs_cc, v128b, v128f, 2>; + let Defs = [CC] in { + def VSTRSZB : TernaryVRRd<"vstrszb", 0xE78B, + z_vstrsz_cc, v128b, v128b, 0, 2>; + def VSTRSZH : TernaryVRRd<"vstrszh", 0xE78B, + z_vstrsz_cc, v128b, v128h, 1, 2>; + def VSTRSZF : TernaryVRRd<"vstrszf", 0xE78B, + z_vstrsz_cc, v128b, v128f, 2, 2>; + } +} + //===----------------------------------------------------------------------===// // Packed-decimal instructions //===----------------------------------------------------------------------===// @@ -1579,6 +1718,10 @@ let Predicates = [FeatureVectorPackedDecimal] in { def VUPKZ : StoreLengthVSI<"vupkz", 0xE63C, null_frag, 0>; let Defs = [CC] in { + let Predicates = [FeatureVectorPackedDecimalEnhancement] in { + def VCVBOpt : TernaryVRRi<"vcvb", 0xE650, GR32>; + def VCVBGOpt : TernaryVRRi<"vcvbg", 0xE652, GR64>; + } def VCVB : BinaryVRRi<"vcvb", 0xE650, GR32>; def VCVBG : BinaryVRRi<"vcvbg", 0xE652, GR64>; def VCVD : TernaryVRIi<"vcvd", 0xE658, GR32>; diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td index 62f33f523ae..15bd12bc98a 100644 --- a/llvm/lib/Target/SystemZ/SystemZOperators.td +++ b/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -191,6 +191,12 @@ def SDT_ZVecTernary : SDTypeProfile<1, 3, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>; +def SDT_ZVecTernaryConvCC : SDTypeProfile<2, 3, + [SDTCisVec<0>, + SDTCisVT<1, i32>, + SDTCisVec<2>, + SDTCisSameAs<2, 3>, + SDTCisSameAs<0, 4>]>; def SDT_ZVecTernaryInt : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, @@ -278,6 +284,10 @@ def z_loadbswap : SDNode<"SystemZISD::LRV", SDTLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def z_storebswap : SDNode<"SystemZISD::STRV", SDTStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def z_loadeswap : SDNode<"SystemZISD::VLER", SDTLoad, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def z_storeeswap : SDNode<"SystemZISD::VSTER", SDTStore, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def z_tdc : SDNode<"SystemZISD::TDC", SDT_ZTest>; @@ -337,6 +347,10 @@ def z_vstrc_cc : SDNode<"SystemZISD::VSTRC_CC", SDT_ZVecQuaternaryIntCC>; def z_vstrcz_cc : SDNode<"SystemZISD::VSTRCZ_CC", SDT_ZVecQuaternaryIntCC>; +def z_vstrs_cc : SDNode<"SystemZISD::VSTRS_CC", + SDT_ZVecTernaryConvCC>; +def z_vstrsz_cc : SDNode<"SystemZISD::VSTRSZ_CC", + SDT_ZVecTernaryConvCC>; def z_vftci : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvIntCC>; class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW> @@ -661,6 +675,18 @@ def z_usub : PatFrags<(ops node:$src1, node:$src2), [(z_usubo node:$src1, node:$src2), (sub node:$src1, node:$src2)]>; +// Combined logical operations. +def andc : PatFrag<(ops node:$src1, node:$src2), + (and node:$src1, (not node:$src2))>; +def orc : PatFrag<(ops node:$src1, node:$src2), + (or node:$src1, (not node:$src2))>; +def nand : PatFrag<(ops node:$src1, node:$src2), + (not (and node:$src1, node:$src2))>; +def nor : PatFrag<(ops node:$src1, node:$src2), + (not (or node:$src1, node:$src2))>; +def nxor : PatFrag<(ops node:$src1, node:$src2), + (not (xor node:$src1, node:$src2))>; + // Fused multiply-subtract, using the natural operand order. def any_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3), (any_fma node:$src1, node:$src2, (fneg node:$src3))>; @@ -722,6 +748,10 @@ def z_replicate_loadi32 : z_replicate_load<i32, load>; def z_replicate_loadi64 : z_replicate_load<i64, load>; def z_replicate_loadf32 : z_replicate_load<f32, load>; def z_replicate_loadf64 : z_replicate_load<f64, load>; +// Byte-swapped replicated vector element loads. +def z_replicate_loadbswapi16 : z_replicate_load<i32, z_loadbswap16>; +def z_replicate_loadbswapi32 : z_replicate_load<i32, z_loadbswap32>; +def z_replicate_loadbswapi64 : z_replicate_load<i64, z_loadbswap64>; // Load a scalar and insert it into a single element of a vector. class z_vle<ValueType scalartype, SDPatternOperator load> @@ -734,6 +764,10 @@ def z_vlei32 : z_vle<i32, load>; def z_vlei64 : z_vle<i64, load>; def z_vlef32 : z_vle<f32, load>; def z_vlef64 : z_vle<f64, load>; +// Byte-swapped vector element loads. +def z_vlebri16 : z_vle<i32, z_loadbswap16>; +def z_vlebri32 : z_vle<i32, z_loadbswap32>; +def z_vlebri64 : z_vle<i64, z_loadbswap64>; // Load a scalar and insert it into the low element of the high i64 of a // zeroed vector. @@ -778,6 +812,18 @@ def z_vllezlf32 : PatFrag<(ops node:$addr), (v2i64 (bitconvert (v4f32 immAllZerosV))))>; +// Byte-swapped variants. +def z_vllebrzi16 : z_vllez<i32, z_loadbswap16, 3>; +def z_vllebrzi32 : z_vllez<i32, z_loadbswap32, 1>; +def z_vllebrzli32 : z_vllez<i32, z_loadbswap32, 0>; +def z_vllebrzi64 : PatFrags<(ops node:$addr), + [(z_vector_insert immAllZerosV, + (i64 (z_loadbswap64 node:$addr)), + (i32 0)), + (z_join_dwords (i64 (z_loadbswap64 node:$addr)), + (i64 0))]>; + + // Store one element of a vector. class z_vste<ValueType scalartype, SDPatternOperator store> : PatFrag<(ops node:$vec, node:$addr, node:$index), @@ -789,6 +835,10 @@ def z_vstei32 : z_vste<i32, store>; def z_vstei64 : z_vste<i64, store>; def z_vstef32 : z_vste<f32, store>; def z_vstef64 : z_vste<f64, store>; +// Byte-swapped vector element stores. +def z_vstebri16 : z_vste<i32, z_storebswap16>; +def z_vstebri32 : z_vste<i32, z_storebswap32>; +def z_vstebri64 : z_vste<i64, z_storebswap64>; // Arithmetic negation on vectors. def z_vneg : PatFrag<(ops node:$x), (sub immAllZerosV, node:$x)>; diff --git a/llvm/lib/Target/SystemZ/SystemZProcessors.td b/llvm/lib/Target/SystemZ/SystemZProcessors.td index 3dd0ea52b71..b27c25beb58 100644 --- a/llvm/lib/Target/SystemZ/SystemZProcessors.td +++ b/llvm/lib/Target/SystemZ/SystemZProcessors.td @@ -35,3 +35,5 @@ def : ProcessorModel<"z13", Z13Model, Arch11SupportedFeatures.List>; def : ProcessorModel<"arch12", Z14Model, Arch12SupportedFeatures.List>; def : ProcessorModel<"z14", Z14Model, Arch12SupportedFeatures.List>; +def : ProcessorModel<"arch13", Arch13Model, Arch13SupportedFeatures.List>; + diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 2076a060747..e7cd6871dbb 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -96,17 +96,21 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg, if (!DoneRegs.insert(Reg).second) continue; - for (auto &Use : MRI->use_instructions(Reg)) { + for (auto &Use : MRI->reg_instructions(Reg)) { // For LOCRMux, see if the other operand is already a high or low - // register, and in that case give the correpsonding hints for + // register, and in that case give the corresponding hints for // VirtReg. LOCR instructions need both operands in either high or - // low parts. - if (Use.getOpcode() == SystemZ::LOCRMux) { + // low parts. Same handling for SELRMux. + if (Use.getOpcode() == SystemZ::LOCRMux || + Use.getOpcode() == SystemZ::SELRMux) { MachineOperand &TrueMO = Use.getOperand(1); MachineOperand &FalseMO = Use.getOperand(2); const TargetRegisterClass *RC = TRI->getCommonSubClass(getRC32(FalseMO, VRM, MRI), getRC32(TrueMO, VRM, MRI)); + if (Use.getOpcode() == SystemZ::SELRMux) + RC = TRI->getCommonSubClass(RC, + getRC32(Use.getOperand(0), VRM, MRI)); if (RC && RC != &SystemZ::GRX32BitRegClass) { addHints(Order, Hints, RC, MRI); // Return true to make these hints the only regs available to diff --git a/llvm/lib/Target/SystemZ/SystemZSchedule.td b/llvm/lib/Target/SystemZ/SystemZSchedule.td index c56bb17e8f1..98eca280224 100644 --- a/llvm/lib/Target/SystemZ/SystemZSchedule.td +++ b/llvm/lib/Target/SystemZ/SystemZSchedule.td @@ -59,6 +59,7 @@ def VBU : SchedWrite; // Virtual branching unit def MCD : SchedWrite; // Millicode +include "SystemZScheduleArch13.td" include "SystemZScheduleZ14.td" include "SystemZScheduleZ13.td" include "SystemZScheduleZEC12.td" diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleArch13.td b/llvm/lib/Target/SystemZ/SystemZScheduleArch13.td new file mode 100644 index 00000000000..9f82f24d0e8 --- /dev/null +++ b/llvm/lib/Target/SystemZ/SystemZScheduleArch13.td @@ -0,0 +1,1695 @@ +//-- SystemZScheduleArch13.td - SystemZ Scheduling Definitions ----*- tblgen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Arch13 to support instruction +// scheduling and other instruction cost heuristics. +// +// Pseudos expanded right after isel do not need to be modelled here. +// +//===----------------------------------------------------------------------===// + +def Arch13Model : SchedMachineModel { + + let UnsupportedFeatures = Arch13UnsupportedFeatures.List; + + let IssueWidth = 6; // Number of instructions decoded per cycle. + let MicroOpBufferSize = 60; // Issue queues + let LoadLatency = 1; // Optimistic load latency. + + let PostRAScheduler = 1; + + // Extra cycles for a mispredicted branch. + let MispredictPenalty = 20; +} + +let SchedModel = Arch13Model in { +// These definitions need the SchedModel value. They could be put in a +// subtarget common include file, but it seems the include system in Tablegen +// currently (2016) rejects multiple includes of same file. + +// Decoder grouping rules +let NumMicroOps = 1 in { + def : WriteRes<NormalGr, []>; + def : WriteRes<BeginGroup, []> { let BeginGroup = 1; } + def : WriteRes<EndGroup, []> { let EndGroup = 1; } +} +def : WriteRes<Cracked, []> { + let NumMicroOps = 2; + let BeginGroup = 1; +} +def : WriteRes<GroupAlone, []> { + let NumMicroOps = 3; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes<GroupAlone2, []> { + let NumMicroOps = 6; + let BeginGroup = 1; + let EndGroup = 1; +} +def : WriteRes<GroupAlone3, []> { + let NumMicroOps = 9; + let BeginGroup = 1; + let EndGroup = 1; +} + +// Incoming latency removed from the register operand which is used together +// with a memory operand by the instruction. +def : ReadAdvance<RegReadAdv, 4>; + +// LoadLatency (above) is not used for instructions in this file. This is +// instead the role of LSULatency, which is the latency value added to the +// result of loads and instructions with folded memory operands. +def : WriteRes<LSULatency, []> { let Latency = 4; let NumMicroOps = 0; } + +let NumMicroOps = 0 in { + foreach L = 1-30 in + def : WriteRes<!cast<SchedWrite>("WLat"#L), []> { let Latency = L; } +} + +// Execution units. +def Arch13_FXaUnit : ProcResource<2>; +def Arch13_FXbUnit : ProcResource<2>; +def Arch13_LSUnit : ProcResource<2>; +def Arch13_VecUnit : ProcResource<2>; +def Arch13_VecFPdUnit : ProcResource<2> { let BufferSize = 1; /* blocking */ } +def Arch13_VBUnit : ProcResource<2>; +def Arch13_MCD : ProcResource<1>; + +// Subtarget specific definitions of scheduling resources. +let NumMicroOps = 0 in { + def : WriteRes<FXa, [Arch13_FXaUnit]>; + def : WriteRes<FXb, [Arch13_FXbUnit]>; + def : WriteRes<LSU, [Arch13_LSUnit]>; + def : WriteRes<VecBF, [Arch13_VecUnit]>; + def : WriteRes<VecDF, [Arch13_VecUnit]>; + def : WriteRes<VecDFX, [Arch13_VecUnit]>; + def : WriteRes<VecMul, [Arch13_VecUnit]>; + def : WriteRes<VecStr, [Arch13_VecUnit]>; + def : WriteRes<VecXsPm, [Arch13_VecUnit]>; + foreach Num = 2-5 in { let ResourceCycles = [Num] in { + def : WriteRes<!cast<SchedWrite>("FXa"#Num), [Arch13_FXaUnit]>; + def : WriteRes<!cast<SchedWrite>("FXb"#Num), [Arch13_FXbUnit]>; + def : WriteRes<!cast<SchedWrite>("LSU"#Num), [Arch13_LSUnit]>; + def : WriteRes<!cast<SchedWrite>("VecBF"#Num), [Arch13_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecDF"#Num), [Arch13_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecDFX"#Num), [Arch13_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecMul"#Num), [Arch13_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecStr"#Num), [Arch13_VecUnit]>; + def : WriteRes<!cast<SchedWrite>("VecXsPm"#Num), [Arch13_VecUnit]>; + }} + + def : WriteRes<VecFPd, [Arch13_VecFPdUnit]> { let ResourceCycles = [30]; } + + def : WriteRes<VBU, [Arch13_VBUnit]>; // Virtual Branching Unit +} + +def : WriteRes<MCD, [Arch13_MCD]> { let NumMicroOps = 3; + let BeginGroup = 1; + let EndGroup = 1; } + +// -------------------------- INSTRUCTIONS ---------------------------------- // + +// InstRW constructs have been used in order to preserve the +// readability of the InstrInfo files. + +// For each instruction, as matched by a regexp, provide a list of +// resources that it needs. These will be combined into a SchedClass. + +//===----------------------------------------------------------------------===// +// Stack allocation +//===----------------------------------------------------------------------===// + +// Pseudo -> LA / LAY +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ADJDYNALLOC$")>; + +//===----------------------------------------------------------------------===// +// Branch instructions +//===----------------------------------------------------------------------===// + +// Branch +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?BRC(L)?(Asm.*)?$")>; +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?J(G)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?BC(R)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?B(R)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "BI(C)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXa, EndGroup], (instregex "BRCT(G)?$")>; +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BRCTH$")>; +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BCT(G)?(R)?$")>; +def : InstRW<[WLat1, FXa2, FXb2, GroupAlone2], + (instregex "B(R)?X(H|L).*$")>; + +// Compare and branch +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?(G)?(I|R)J(Asm.*)?$")>; +def : InstRW<[WLat1, FXb2, GroupAlone], + (instregex "C(L)?(G)?(I|R)B(Call|Return|Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Trap instructions +//===----------------------------------------------------------------------===// + +// Trap +def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Cond)?Trap$")>; + +// Compare and trap +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?(I|R)T(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(G)?RT(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(F|G)IT(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Call and return instructions +//===----------------------------------------------------------------------===// + +// Call +def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>; + +// Return +def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn$")>; + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +// Moves +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MV(G|H)?HI$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MVI(Y)?$")>; + +// Move character +def : InstRW<[WLat1, FXb, LSU3, GroupAlone], (instregex "MVC$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVCL(E|U)?$")>; +def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "MVCRL$")>; + +// Pseudo -> reg move +def : InstRW<[WLat1, FXa, NormalGr], (instregex "COPY(_TO_REGCLASS)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "EXTRACT_SUBREG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "INSERT_SUBREG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "REG_SEQUENCE$")>; + +// Loads +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L(Y|FH|RL|Mux)?$")>; +def : InstRW<[LSULatency, LSULatency, LSU, NormalGr], (instregex "LCBB$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LG(RL)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L128$")>; + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIH(F|H|L)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIL(F|H|L)$")>; + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(F|H)I$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LHI(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LR(Mux)?$")>; + +// Load and zero rightmost byte +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LZR(F|G)$")>; + +// Load and trap +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "L(FH|G)?AT$")>; + +// Load and test +def : InstRW<[WLat1LSU, WLat1LSU, LSU, FXa, NormalGr], (instregex "LT(G)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LT(G)?R$")>; + +// Stores +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STG(RL)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST128$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(Y|FH|RL|Mux)?$")>; + +// String moves. +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVST$")>; + +//===----------------------------------------------------------------------===// +// Conditional move instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOCRMux$")>; +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|FH)?R(Asm.*)?$")>; +def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|H)?HI(Mux|(Asm.*))?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], + (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>; + +def : InstRW<[WLat2, FXa, NormalGr], (instregex "SELRMux$")>; +def : InstRW<[WLat2, FXa, NormalGr], (instregex "SEL(G|FH)?R(Asm.*)?$")>; + +//===----------------------------------------------------------------------===// +// Sign extensions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "L(B|H|G)R$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(B|H|F)R$")>; + +def : InstRW<[WLat1LSU, WLat1LSU, FXa, LSU, NormalGr], (instregex "LTGF$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LTGFR$")>; + +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LB(H|Mux)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(Y)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(H|Mux|RL)$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(B|H|F)$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(H|F)RL$")>; + +//===----------------------------------------------------------------------===// +// Zero extensions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLCR(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLHR(Mux)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLG(C|H|F|T)R$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLC(Mux)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLH(Mux)?$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LL(C|H)H$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLHRL$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLG(C|H|F|T|HRL|FRL)$")>; + +// Load and zero rightmost byte +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLZRGF$")>; + +// Load and trap +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "LLG(F|T)?AT$")>; + +//===----------------------------------------------------------------------===// +// Truncations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STC(H|Y|Mux)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STH(H|Y|RL|Mux)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STCM(H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Multi-register moves +//===----------------------------------------------------------------------===// + +// Load multiple (estimated average of 5 ops) +def : InstRW<[WLat10, WLat10, LSU5, GroupAlone], (instregex "LM(H|Y|G)?$")>; + +// Load multiple disjoint +def : InstRW<[WLat30, WLat30, MCD], (instregex "LMD$")>; + +// Store multiple +def : InstRW<[WLat1, LSU2, FXb3, GroupAlone], (instregex "STM(G|H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Byte swaps +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LRV(G)?R$")>; +def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LRV(G|H)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STRV(G|H)?$")>; +def : InstRW<[WLat30, MCD], (instregex "MVCIN$")>; + +//===----------------------------------------------------------------------===// +// Load address instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LA(Y|RL)?$")>; + +// Load the Global Offset Table address ( -> larl ) +def : InstRW<[WLat1, FXa, NormalGr], (instregex "GOT$")>; + +//===----------------------------------------------------------------------===// +// Absolute and Negation +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LP(G)?R$")>; +def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "L(N|P)GFR$")>; +def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LN(R|GR)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "LC(R|GR)$")>; +def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "LCGFR$")>; + +//===----------------------------------------------------------------------===// +// Insertion +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "IC(Y)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "IC32(Y)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, WLat1LSU, FXa, LSU, NormalGr], + (instregex "ICM(H|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "II(F|H|L)Mux$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILL(64)?$")>; + +//===----------------------------------------------------------------------===// +// Addition +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "A(Y)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AH(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AIH$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AFI(Mux)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AG$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGFI$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGHI(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHI(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHIMux(K)?$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AL(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AL(FI|HSIK)$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "ALG(F)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGHSIK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGF(I|R)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "AR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?HHHR$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "A(L)?HHLR$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALSIH(N)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "A(L)?(G)?SI$")>; + +// Logical addition with carry +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], + (instregex "ALC(G)?$")>; +def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "ALC(G)?R$")>; + +// Add with sign extension (16/32 -> 64) +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "AG(F|H)$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "AGFR$")>; + +//===----------------------------------------------------------------------===// +// Subtraction +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "S(G|Y)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SH(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLFI$")>; +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SL(G|GF|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGF(I|R)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?HHHR$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "S(L)?HHLR$")>; + +// Subtraction with borrow +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone], + (instregex "SLB(G)?$")>; +def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "SLB(G)?R$")>; + +// Subtraction with sign extension (16/32 -> 64) +def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "SG(F|H)$")>; +def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "SGFR$")>; + +//===----------------------------------------------------------------------===// +// AND +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "N(G|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NI(FMux|HMux|LMux)$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "NI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "NC$")>; + +//===----------------------------------------------------------------------===// +// OR +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "O(G|Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OGR(K)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "OI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OI(FMux|HMux|LMux)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILH(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILL(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "OC$")>; + +//===----------------------------------------------------------------------===// +// XOR +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "X(G|Y)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "XI(Y)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIFMux$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XGR(K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIHF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XILF(64)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "XR(K)?$")>; +def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "XC$")>; + +//===----------------------------------------------------------------------===// +// Combined logical operations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NC(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "OC(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NN(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NO(G)?RK$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "NX(G)?RK$")>; + +//===----------------------------------------------------------------------===// +// Multiplication +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat5LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "MS(GF|Y)?$")>; +def : InstRW<[WLat5, FXa, NormalGr], (instregex "MS(R|FI)$")>; +def : InstRW<[WLat7LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MSG$")>; +def : InstRW<[WLat7, FXa, NormalGr], (instregex "MSGR$")>; +def : InstRW<[WLat5, FXa, NormalGr], (instregex "MSGF(I|R)$")>; +def : InstRW<[WLat8LSU, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MLG$")>; +def : InstRW<[WLat8, FXa2, GroupAlone], (instregex "MLGR$")>; +def : InstRW<[WLat4, FXa, NormalGr], (instregex "MGHI$")>; +def : InstRW<[WLat4, FXa, NormalGr], (instregex "MHI$")>; +def : InstRW<[WLat4LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MH(Y)?$")>; +def : InstRW<[WLat6, FXa2, GroupAlone], (instregex "M(L)?R$")>; +def : InstRW<[WLat6LSU, RegReadAdv, FXa2, LSU, GroupAlone], + (instregex "M(FY|L)?$")>; +def : InstRW<[WLat8, RegReadAdv, FXa, LSU, NormalGr], (instregex "MGH$")>; +def : InstRW<[WLat12, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MG$")>; +def : InstRW<[WLat8, FXa2, GroupAlone], (instregex "MGRK$")>; +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "MSC$")>; +def : InstRW<[WLat8LSU, WLat8LSU, RegReadAdv, FXa, LSU, NormalGr], + (instregex "MSGC$")>; +def : InstRW<[WLat6, WLat6, FXa, NormalGr], (instregex "MSRKC$")>; +def : InstRW<[WLat8, WLat8, FXa, NormalGr], (instregex "MSGRKC$")>; + +//===----------------------------------------------------------------------===// +// Division and remainder +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DR$")>; +def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "D$")>; +def : InstRW<[WLat30, FXa2, GroupAlone], (instregex "DSG(F)?R$")>; +def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone2], + (instregex "DSG(F)?$")>; +def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DLR$")>; +def : InstRW<[WLat30, FXa4, GroupAlone], (instregex "DLGR$")>; +def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], + (instregex "DL(G)?$")>; + +//===----------------------------------------------------------------------===// +// Shifts +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLL(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRL(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRA(G|K)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLA(G|K)?$")>; +def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone2], + (instregex "S(L|R)D(A|L)$")>; + +// Rotate +def : InstRW<[WLat2LSU, FXa, LSU, NormalGr], (instregex "RLL(G)?$")>; + +// Rotate and insert +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBG(N|32)?$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBH(G|H|L)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBL(G|H|L)$")>; +def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBMux$")>; + +// Rotate and Select +def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "R(N|O|X)SBG$")>; + +//===----------------------------------------------------------------------===// +// Comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "C(G|Y|Mux)?$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(F|H)I(Mux)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CG(F|H)I$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CG(HSI|RL)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?R$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CIH$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CHF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CHSI$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "CL(Y|Mux)?$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLFHSI$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLFI(Mux)?$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLG$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLG(HRL|HSI)$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLGF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGFRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGF(I|R)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGR$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGRL$")>; +def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLHF$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLH(RL|HSI)$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLIH$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLI(Y)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLR$")>; +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLRL$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?HHR$")>; +def : InstRW<[WLat2, FXb, NormalGr], (instregex "C(L)?HLR$")>; + +// Compare halfword +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CH(Y)?$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CHRL$")>; +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGH$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGHRL$")>; +def : InstRW<[WLat2LSU, FXa, FXb, LSU, Cracked], (instregex "CHHSI$")>; + +// Compare with sign extension (32 -> 64) +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGF$")>; +def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGFRL$")>; +def : InstRW<[WLat2, FXb, NormalGr], (instregex "CGFR$")>; + +// Compare logical character +def : InstRW<[WLat6, FXb, LSU2, Cracked], (instregex "CLC$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLCL(E|U)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLST$")>; + +// Test under mask +def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "TM(Y)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TM(H|L)Mux$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHH(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHL(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLH(64)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLL(64)?$")>; + +// Compare logical characters under mask +def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], + (instregex "CLM(H|Y)?$")>; + +//===----------------------------------------------------------------------===// +// Prefetch and execution hint +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, LSU, NormalGr], (instregex "PFD(RL)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "BPP$")>; +def : InstRW<[FXb, EndGroup], (instregex "BPRP$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "NIAI$")>; + +//===----------------------------------------------------------------------===// +// Atomic operations +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, EndGroup], (instregex "Serialize$")>; + +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAA(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAAL(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAN(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAO(G)?$")>; +def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAX(G)?$")>; + +// Test and set +def : InstRW<[WLat2LSU, FXb, LSU, EndGroup], (instregex "TS$")>; + +// Compare and swap +def : InstRW<[WLat3LSU, WLat3LSU, FXa, FXb, LSU, GroupAlone], + (instregex "CS(G|Y)?$")>; + +// Compare double and swap +def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone2], + (instregex "CDS(Y)?$")>; +def : InstRW<[WLat15, WLat15, FXa2, FXb4, LSU3, + GroupAlone3], (instregex "CDSG$")>; + +// Compare and swap and store +def : InstRW<[WLat30, MCD], (instregex "CSST$")>; + +// Perform locked operation +def : InstRW<[WLat30, MCD], (instregex "PLO$")>; + +// Load/store pair from/to quadword +def : InstRW<[WLat4LSU, LSU2, GroupAlone], (instregex "LPQ$")>; +def : InstRW<[WLat1, FXb2, LSU, GroupAlone], (instregex "STPQ$")>; + +// Load pair disjoint +def : InstRW<[WLat1LSU, WLat1LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>; + +//===----------------------------------------------------------------------===// +// Translate and convert +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "TR$")>; +def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone2], + (instregex "TRT$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRT(R)?E(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TR(T|O)(T|O)(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(CUUTF|CUTFU)(Opt)?$")>; + +//===----------------------------------------------------------------------===// +// Message-security assist +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], + (instregex "KM(C|F|O|CTR|A)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "(KIMD|KLMD|KMAC|KDSA)$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], + (instregex "(PCC|PPNO|PRNO)$")>; + +//===----------------------------------------------------------------------===// +// Guarded storage +//===----------------------------------------------------------------------===// + +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LGG$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLGFSG$")>; +def : InstRW<[WLat30, MCD], (instregex "(L|ST)GSC$")>; + +//===----------------------------------------------------------------------===// +// Decimal arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat20, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone2], + (instregex "CVBG$")>; +def : InstRW<[WLat20, RegReadAdv, FXb, VecDF, LSU, GroupAlone2], + (instregex "CVB(Y)?$")>; +def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone3], (instregex "CVDG$")>; +def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone2], (instregex "CVD(Y)?$")>; +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>; +def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; +def : InstRW<[WLat12, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>; +def : InstRW<[WLat1, FXb, LSU2, Cracked], (instregex "UNPK$")>; + +def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone2], + (instregex "(A|S|ZA)P$")>; +def : InstRW<[WLat1, FXb, VecDFX2, LSU3, GroupAlone2], (instregex "MP$")>; +def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone2], (instregex "DP$")>; +def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone3], (instregex "SRP$")>; +def : InstRW<[WLat8, VecDFX, LSU, LSU, GroupAlone], (instregex "CP$")>; +def : InstRW<[WLat3LSU, VecDFX, LSU, Cracked], (instregex "TP$")>; +def : InstRW<[WLat30, MCD], (instregex "ED(MK)?$")>; + +//===----------------------------------------------------------------------===// +// Access registers +//===----------------------------------------------------------------------===// + +// Extract/set/copy access register +def : InstRW<[WLat3, LSU, NormalGr], (instregex "(EAR|SAR|CPYA)$")>; + +// Load address extended +def : InstRW<[WLat5, LSU, FXa, Cracked], (instregex "LAE(Y)?$")>; + +// Load/store access multiple (not modeled precisely) +def : InstRW<[WLat20, WLat20, LSU5, GroupAlone], (instregex "LAM(Y)?$")>; +def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STAM(Y)?$")>; + +//===----------------------------------------------------------------------===// +// Program mask and addressing mode +//===----------------------------------------------------------------------===// + +// Insert Program Mask +def : InstRW<[WLat3, FXa, EndGroup], (instregex "IPM$")>; + +// Set Program Mask +def : InstRW<[WLat3, LSU, EndGroup], (instregex "SPM$")>; + +// Branch and link +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BAL(R)?$")>; + +// Test addressing mode +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TAM$")>; + +// Set addressing mode +def : InstRW<[WLat1, FXb, EndGroup], (instregex "SAM(24|31|64)$")>; + +// Branch (and save) and set mode. +def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BSM$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BASSM$")>; + +//===----------------------------------------------------------------------===// +// Transactional execution +//===----------------------------------------------------------------------===// + +// Transaction begin +def : InstRW<[WLat9, LSU2, FXb5, GroupAlone2], (instregex "TBEGIN(C)?$")>; + +// Transaction end +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "TEND$")>; + +// Transaction abort +def : InstRW<[WLat30, MCD], (instregex "TABORT$")>; + +// Extract Transaction Nesting Depth +def : InstRW<[WLat1, FXa, NormalGr], (instregex "ETND$")>; + +// Nontransactional store +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "NTSTG$")>; + +//===----------------------------------------------------------------------===// +// Processor assist +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "PPA$")>; + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +//===----------------------------------------------------------------------===// + +// Find leftmost one +def : InstRW<[WLat5, WLat5, FXa2, GroupAlone], (instregex "FLOGR$")>; + +// Population count +def : InstRW<[WLat3, WLat3, FXa, NormalGr], (instregex "POPCNT(Opt)?$")>; + +// String instructions +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "SRST(U)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CUSE$")>; + +// Various complex instructions +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CFC$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, WLat30, WLat30, MCD], + (instregex "UPT$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CKSM$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CMPSC$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "SORTL$")>; +def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "DFLTCC$")>; + +// Execute +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "EX(RL)?$")>; + +//===----------------------------------------------------------------------===// +// .insn directive instructions +//===----------------------------------------------------------------------===// + +// An "empty" sched-class will be assigned instead of the "invalid sched-class". +// getNumDecoderSlots() will then return 1 instead of 0. +def : InstRW<[], (instregex "Insn.*")>; + + +// ----------------------------- Floating point ----------------------------- // + +//===----------------------------------------------------------------------===// +// FP: Move instructions +//===----------------------------------------------------------------------===// + +// Load zero +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LZ(DR|ER)$")>; +def : InstRW<[WLat2, FXb2, Cracked], (instregex "LZXR$")>; + +// Load +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "LER$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LD(R|R32|GR)$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "LGDR$")>; +def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>; + +// Load and Test +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BRCompare$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], + (instregex "LTXBR(Compare)?$")>; + +// Copy sign +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>; + +//===----------------------------------------------------------------------===// +// FP: Load instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2LSU, VecXsPm, LSU, NormalGr], (instregex "LE(Y)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LD(Y|E32)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>; + +//===----------------------------------------------------------------------===// +// FP: Store instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(E|D)(Y)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STX$")>; + +//===----------------------------------------------------------------------===// +// FP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LEDBR(A)?$")>; +def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "L(E|D)XBR(A)?$")>; + +// Load lengthened +def : InstRW<[WLat6LSU, VecBF, LSU, NormalGr], (instregex "LDEB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LDEBR$")>; +def : InstRW<[WLat7LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)B$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "LX(E|D)BR$")>; + +// Convert from fixed / logical +def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)BR(A)?$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)BR(A)?$")>; +def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)L(F|G)BR$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)BR$")>; + +// Convert to fixed / logical +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], + (instregex "C(F|G)(E|D)BR(A)?$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], + (instregex "C(F|G)XBR(A)?$")>; +def : InstRW<[WLat9, WLat9, FXb, VecBF, GroupAlone], (instregex "CLFEBR$")>; +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "CLFDBR$")>; +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "CLG(E|D)BR$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "CL(F|G)XBR$")>; + +//===----------------------------------------------------------------------===// +// FP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)BR$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "L(C|N|P)DFR(_32)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XBR$")>; + +// Square root +def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)B$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQ(E|D)BR$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXBR$")>; + +// Load FP integer +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "FI(E|D)BR(A)?$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXBR(A)?$")>; + +//===----------------------------------------------------------------------===// +// FP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "A(E|D)B$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "A(E|D)BR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXBR$")>; + +// Subtraction +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "S(E|D)B$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "S(E|D)BR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXBR$")>; + +// Multiply +def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "M(D|DE|EE)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(D|DE|EE)BR$")>; +def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MXDB$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MXDBR$")>; +def : InstRW<[WLat15, VecDF4, GroupAlone], (instregex "MXBR$")>; + +// Multiply and add / subtract +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)EB$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "M(A|S)EBR$")>; +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(A|S)DBR$")>; + +// Division +def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], + (instregex "D(E|D)B$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "D(E|D)BR$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXBR$")>; + +// Divide to integer +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "DI(E|D)BR$")>; + +//===----------------------------------------------------------------------===// +// FP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat3LSU, RegReadAdv, VecXsPm, LSU, NormalGr], + (instregex "(K|C)(E|D)B$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "(K|C)(E|D)BR$")>; +def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XBR$")>; + +// Test Data Class +def : InstRW<[WLat5, LSU, VecXsPm, NormalGr], (instregex "TC(E|D)B$")>; +def : InstRW<[WLat10, LSU, VecDF4, GroupAlone], (instregex "TCXB$")>; + +//===----------------------------------------------------------------------===// +// FP: Floating-point control register instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4, FXa, LSU, GroupAlone], (instregex "EFPC$")>; +def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "STFPC$")>; +def : InstRW<[WLat3, LSU, GroupAlone], (instregex "SFPC$")>; +def : InstRW<[WLat3LSU, LSU2, GroupAlone], (instregex "LFPC$")>; +def : InstRW<[WLat30, MCD], (instregex "SFASR$")>; +def : InstRW<[WLat30, MCD], (instregex "LFAS$")>; +def : InstRW<[WLat3, FXb, GroupAlone], (instregex "SRNM(B|T)?$")>; + + +// --------------------- Hexadecimal floating point ------------------------- // + +//===----------------------------------------------------------------------===// +// HFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "(LEDR|LRER)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LEXR$")>; +def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "(LDXR|LRDR)$")>; + +// Load lengthened +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LDE$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LDER$")>; +def : InstRW<[WLat7LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "LX(E|D)R$")>; + +// Convert from fixed +def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)R$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)R$")>; + +// Convert to fixed +def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "C(F|G)(E|D)R$")>; +def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "C(F|G)XR$")>; + +// Convert BFP to HFP / HFP to BFP. +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "THD(E)?R$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "TB(E)?DR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XR$")>; + +// Halve +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "H(E|D)R$")>; + +// Square root +def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQ(E|D)R$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXR$")>; + +// Load FP integer +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "FI(E|D)R$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "A(E|D|U|W)$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "A(E|D|U|W)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXR$")>; + +// Subtraction +def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "S(E|D|U|W)$")>; +def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "S(E|D|U|W)R$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXR$")>; + +// Multiply +def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "M(D|DE|E|EE)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(D|DE|E|EE)R$")>; +def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MXD$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MXDR$")>; +def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "MXR$")>; +def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], (instregex "MY$")>; +def : InstRW<[WLat6LSU, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "MY(H|L)$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MYR$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "MY(H|L)R$")>; + +// Multiply and add / subtract +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "M(A|S)(E|D)$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "M(A|S)(E|D)R$")>; +def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, VecBF4, LSU, GroupAlone], + (instregex "MAY$")>; +def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone], + (instregex "MAY(H|L)$")>; +def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MAYR$")>; +def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "MAY(H|L)R$")>; + +// Division +def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], (instregex "D(E|D)$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "D(E|D)R$")>; +def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr], + (instregex "C(E|D)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "C(E|D)R$")>; +def : InstRW<[WLat10, VecDF2, GroupAlone], (instregex "CXR$")>; + + +// ------------------------ Decimal floating point -------------------------- // + +//===----------------------------------------------------------------------===// +// DFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "LTDTR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[WLat15, VecDF, NormalGr], (instregex "LEDTR$")>; +def : InstRW<[WLat15, VecDF2, NormalGr], (instregex "LDXTR$")>; + +// Load lengthened +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "LDETR$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "LXDTR$")>; + +// Convert from fixed / logical +def : InstRW<[WLat15, FXb, VecDF, Cracked], (instregex "CDFTR(A)?$")>; +def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CDGTR(A)?$")>; +def : InstRW<[WLat15, FXb, VecDF4, GroupAlone2], (instregex "CXFTR(A)?$")>; +def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CXGTR(A)?$")>; +def : InstRW<[WLat15, FXb, VecDF, Cracked], (instregex "CDLFTR$")>; +def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CDLGTR$")>; +def : InstRW<[WLat15, FXb, VecDF4, GroupAlone2], (instregex "CXLFTR$")>; +def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CXLGTR$")>; + +// Convert to fixed / logical +def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked], + (instregex "C(F|G)DTR(A)?$")>; +def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked], + (instregex "C(F|G)XTR(A)?$")>; +def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked], (instregex "CL(F|G)DTR$")>; +def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked], (instregex "CL(F|G)XTR$")>; + +// Convert from / to signed / unsigned packed +def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "CD(S|U)TR$")>; +def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone2], (instregex "CX(S|U)TR$")>; +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "C(S|U)DTR$")>; +def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone2], (instregex "C(S|U)XTR$")>; + +// Convert from / to zoned +def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDZT$")>; +def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXZT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CZDT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CZXT$")>; + +// Convert from / to packed +def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDPT$")>; +def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXPT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CPDT$")>; +def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CPXT$")>; + +// Perform floating-point operation +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "PFPO$")>; + +//===----------------------------------------------------------------------===// +// DFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load FP integer +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "FIDTR$")>; +def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXTR$")>; + +// Extract biased exponent +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEDTR$")>; +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEXTR$")>; + +// Extract significance +def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "ESDTR$")>; +def : InstRW<[WLat12, FXb, VecDF2, Cracked], (instregex "ESXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "ADTR(A)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXTR(A)?$")>; + +// Subtraction +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "SDTR(A)?$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXTR(A)?$")>; + +// Multiply +def : InstRW<[WLat30, VecDF, NormalGr], (instregex "MDTR(A)?$")>; +def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "MXTR(A)?$")>; + +// Division +def : InstRW<[WLat30, VecDF, NormalGr], (instregex "DDTR(A)?$")>; +def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "DXTR(A)?$")>; + +// Quantize +def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "QADTR$")>; +def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "QAXTR$")>; + +// Reround +def : InstRW<[WLat9, WLat9, FXb, VecDF, Cracked], (instregex "RRDTR$")>; +def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone2], (instregex "RRXTR$")>; + +// Shift significand left/right +def : InstRW<[WLat11LSU, LSU, VecDF, GroupAlone], (instregex "S(L|R)DT$")>; +def : InstRW<[WLat11LSU, LSU, VecDF4, GroupAlone], (instregex "S(L|R)XT$")>; + +// Insert biased exponent +def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "IEDTR$")>; +def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "IEXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "(K|C)DTR$")>; +def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XTR$")>; + +// Compare biased exponent +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEDTR$")>; +def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEXTR$")>; + +// Test Data Class/Group +def : InstRW<[WLat15, LSU, VecDF, NormalGr], (instregex "TD(C|G)(E|D)T$")>; +def : InstRW<[WLat15, LSU, VecDF2, GroupAlone], (instregex "TD(C|G)XT$")>; + + +// --------------------------------- Vector --------------------------------- // + +//===----------------------------------------------------------------------===// +// Vector: Move instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLR(32|64)?$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLGV(B|F|G|H)?$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLVG(B|F|G|H)?$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLVGP(32)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Immediate instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VZERO$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VONE$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGBM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGM(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREPI(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLEI(B|F|G|H)$")>; + +//===----------------------------------------------------------------------===// +// Vector: Loads +//===----------------------------------------------------------------------===// + +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(Align)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(L|BB)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(32|64)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEZ(B|F|G|H|LF)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLREP(B|F|G|H)?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr], + (instregex "VLE(B|F|G|H)$")>; +def : InstRW<[WLat5LSU, RegReadAdv, FXb, LSU, VecXsPm, Cracked], + (instregex "VGE(F|G)$")>; +def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone], + (instregex "VLM(Align)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLRL(R)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Stores +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|32|64)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>; +def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>; +def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM(Align)?$")>; +def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTRL(R)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Byte swaps +//===----------------------------------------------------------------------===// + +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLBR(H|F|G|Q)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLER(H|F|G)?$")>; +def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr], + (instregex "VLEBR(H|F|G)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEBRZ(H|F|G|E)?$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLBRREP(H|F|G)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTBR(H|F|G|Q)?$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTER(H|F|G)?$")>; +def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTEBRH$")>; +def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTEBR(F|G)$")>; + +//===----------------------------------------------------------------------===// +// Vector: Selects and permutes +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRH(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPERM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPDI$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VBPERM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREP(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEL$")>; + +//===----------------------------------------------------------------------===// +// Vector: Widening and narrowing +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPK(F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEG(B|F|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPH(B|F|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPL(B|F)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLH(B|F|H|W)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLL(B|F|H)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Integer arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VA(B|F|G|H|Q|C|CQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VACC(B|F|G|H|Q|C|CQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVG(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVGL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VN(C|O|N|X)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VO(C)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VCKSM$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCLZ(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCTZ(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VX$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFMA(B|F|G|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM(B|F|G|H)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLC(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLP(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMX(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMXL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMN(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMNL(B|F|G|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAL(B|F)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALE(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALH(B|F|H|W)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALO(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAO(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAE(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAH(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VME(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMH(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VML(B|F)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLE(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLH(B|F|H|W)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLO(B|F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMO(B|F|H)?$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VMSL(G)?$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPOPCT(B|F|G|H)?$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLLV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERIM(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESLV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRA(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRAV(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRLV(B|F|G|H)?$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSL(DB)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)B$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLD$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSRD$")>; + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSB(I|IQ|CBI|CBIQ)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSCBI(B|F|G|H|Q)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VS(F|G|H|Q)?$")>; + +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUM(B|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMG(F|H)?$")>; +def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMQ(F|G)?$")>; + +//===----------------------------------------------------------------------===// +// Vector: Integer comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VEC(B|F|G|H)?$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VECL(B|F|G|H)?$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H)S$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H)?$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H)S$")>; +def : InstRW<[WLat4, VecStr, NormalGr], (instregex "VTM$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point arithmetic +//===----------------------------------------------------------------------===// + +// Conversion and rounding +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCFP(S|L)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCD(L)?G$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCD(L)?GB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WCD(L)?GB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCE(L)?FB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WCE(L)?FB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(S|L)FP$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?GD$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?GDB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WC(L)?GDB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?FEB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WC(L)?FEB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VL(DE|ED)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VL(DE|ED)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WL(DE|ED)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFL(L|R)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFL(LS|RD)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFL(LS|RD)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFLLD$")>; +def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WFLRX$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFI(DB)?$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFIDB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFISB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFISB$")>; +def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WFIXB$")>; + +// Sign operations +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VFPSO$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSODB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSOSB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFPSOXB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFL(C|N|P)XB$")>; + +// Minimum / maximum +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)SB$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WF(MAX|MIN)XB$")>; + +// Test data class +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFTCI$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCIDB$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCISB$")>; +def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFTCIXB$")>; + +// Add / subtract +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)SB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(A|S)SB$")>; +def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WF(A|S)XB$")>; + +// Multiply / multiply-and-add/subtract +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFM(DB)?$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFM(D|S)B$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFMSB$")>; +def : InstRW<[WLat20, VecDF2, NormalGr], (instregex "WFMXB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(N)?M(A|S)DB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)SB$")>; +def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(N)?M(A|S)SB$")>; +def : InstRW<[WLat30, VecDF2, NormalGr], (instregex "WF(N)?M(A|S)XB$")>; + +// Divide / square root +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFD$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FDDB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FDSB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFDXB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFSQ$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FSQDB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FSQSB$")>; +def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFSQXB$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point comparison +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)DB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SB$")>; +def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SB$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XB$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XB$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFC(E|H|HE)DBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFK(E|H|HE)DBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], + (instregex "WF(C|K)(E|H|HE)DBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], + (instregex "VF(C|K)(E|H|HE)SBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SBS$")>; +def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SBS$")>; +def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XBS$")>; +def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XBS$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)DB$")>; +def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)SB$")>; +def : InstRW<[WLat3, VecDFX, NormalGr], (instregex "WF(C|K)XB$")>; + +//===----------------------------------------------------------------------===// +// Vector: Floating-point insertion and extraction +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LEFR$")>; +def : InstRW<[WLat3, FXb, NormalGr], (instregex "LFER$")>; + +//===----------------------------------------------------------------------===// +// Vector: String instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(B)?$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAE(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFEE(B|F|H|ZB|ZF|ZH)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], + (instregex "VFEE(B|F|H|ZB|ZF|ZH)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFENE(B|F|H|ZB|ZF|ZH)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], + (instregex "VFENE(B|F|H|ZB|ZF|ZH)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VISTR(B|F|H)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VISTR(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRC(B|F|H)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRC(B|F|H)S$")>; +def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)S$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRS(B|F|H)?$")>; +def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRSZ(B|F|H)$")>; + +//===----------------------------------------------------------------------===// +// Vector: Packed-decimal instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "VLIP$")>; +def : InstRW<[WLat6, VecDFX, LSU, GroupAlone2], (instregex "VPKZ$")>; +def : InstRW<[WLat1, VecDFX, FXb, LSU2, GroupAlone2], (instregex "VUPKZ$")>; +def : InstRW<[WLat20, WLat20, VecDF2, FXb, GroupAlone], + (instregex "VCVB(G)?(Opt)?$")>; +def : InstRW<[WLat15, WLat15, VecDF2, FXb, GroupAlone], + (instregex "VCVD(G)?$")>; +def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "V(A|S)P$")>; +def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "VM(S)?P$")>; +def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "V(D|R)P$")>; +def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "VSDP$")>; +def : InstRW<[WLat10, WLat10, VecDF2, NormalGr], (instregex "VSRP$")>; +def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "VPSOP$")>; +def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "V(T|C)P$")>; + + +// -------------------------------- System ---------------------------------- // + +//===----------------------------------------------------------------------===// +// System: Program-Status Word Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, WLat30, MCD], (instregex "EPSW$")>; +def : InstRW<[WLat20, GroupAlone3], (instregex "LPSW(E)?$")>; +def : InstRW<[WLat3, FXa, GroupAlone], (instregex "IPK$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SPKA$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SSM$")>; +def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "ST(N|O)SM$")>; +def : InstRW<[WLat3, FXa, NormalGr], (instregex "IAC$")>; +def : InstRW<[WLat1, LSU, EndGroup], (instregex "SAC(F)?$")>; + +//===----------------------------------------------------------------------===// +// System: Control Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4LSU, WLat4LSU, LSU2, GroupAlone], (instregex "LCTL(G)?$")>; +def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STCT(L|G)$")>; +def : InstRW<[LSULatency, LSU, NormalGr], (instregex "E(P|S)A(I)?R$")>; +def : InstRW<[WLat30, MCD], (instregex "SSA(I)?R$")>; +def : InstRW<[WLat30, MCD], (instregex "ESEA$")>; + +//===----------------------------------------------------------------------===// +// System: Prefix-Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "S(T)?PX$")>; + +//===----------------------------------------------------------------------===// +// System: Storage-Key and Real Memory Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "ISKE$")>; +def : InstRW<[WLat30, MCD], (instregex "IVSK$")>; +def : InstRW<[WLat30, MCD], (instregex "SSKE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "RRB(E|M)$")>; +def : InstRW<[WLat30, MCD], (instregex "IRBM$")>; +def : InstRW<[WLat30, MCD], (instregex "PFMF$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "TB$")>; +def : InstRW<[WLat30, MCD], (instregex "PGIN$")>; +def : InstRW<[WLat30, MCD], (instregex "PGOUT$")>; + +//===----------------------------------------------------------------------===// +// System: Dynamic-Address-Translation Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "IPTE(Opt)?(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "IDTE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "CRDTE(Opt)?$")>; +def : InstRW<[WLat30, MCD], (instregex "PTLB$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "CSP(G)?$")>; +def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "LPTEA$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "LRA(Y|G)?$")>; +def : InstRW<[WLat30, MCD], (instregex "STRAG$")>; +def : InstRW<[WLat30, MCD], (instregex "LURA(G)?$")>; +def : InstRW<[WLat30, MCD], (instregex "STUR(A|G)$")>; +def : InstRW<[WLat30, MCD], (instregex "TPROT$")>; + +//===----------------------------------------------------------------------===// +// System: Memory-move Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone2], (instregex "MVC(K|P|S)$")>; +def : InstRW<[WLat1, FXa, LSU5, GroupAlone2], (instregex "MVC(S|D)K$")>; +def : InstRW<[WLat30, MCD], (instregex "MVCOS$")>; +def : InstRW<[WLat30, MCD], (instregex "MVPG$")>; + +//===----------------------------------------------------------------------===// +// System: Address-Space Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "LASP$")>; +def : InstRW<[WLat1, LSU, GroupAlone], (instregex "PALB$")>; +def : InstRW<[WLat30, MCD], (instregex "PC$")>; +def : InstRW<[WLat30, MCD], (instregex "PR$")>; +def : InstRW<[WLat30, MCD], (instregex "PT(I)?$")>; +def : InstRW<[WLat30, MCD], (instregex "RP$")>; +def : InstRW<[WLat30, MCD], (instregex "BS(G|A)$")>; +def : InstRW<[WLat30, MCD], (instregex "TAR$")>; + +//===----------------------------------------------------------------------===// +// System: Linkage-Stack Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "BAKR$")>; +def : InstRW<[WLat30, MCD], (instregex "EREG(G)?$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "(E|M)STA$")>; + +//===----------------------------------------------------------------------===// +// System: Time-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "PTFF$")>; +def : InstRW<[WLat30, MCD], (instregex "SCK(PF|C)?$")>; +def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "SPT$")>; +def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone2], (instregex "STCK(F)?$")>; +def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone3], (instregex "STCKE$")>; +def : InstRW<[WLat30, MCD], (instregex "STCKC$")>; +def : InstRW<[WLat1, LSU2, FXb, Cracked], (instregex "STPT$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "STAP$")>; +def : InstRW<[WLat30, MCD], (instregex "STIDP$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "STSI$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "STFL(E)?$")>; +def : InstRW<[WLat30, MCD], (instregex "ECAG$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "ECTG$")>; +def : InstRW<[WLat30, MCD], (instregex "PTF$")>; +def : InstRW<[WLat30, MCD], (instregex "PCKMO$")>; + +//===----------------------------------------------------------------------===// +// System: Miscellaneous Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "SVC$")>; +def : InstRW<[WLat1, FXb, GroupAlone], (instregex "MC$")>; +def : InstRW<[WLat30, MCD], (instregex "DIAG$")>; +def : InstRW<[WLat1, FXb, NormalGr], (instregex "TRAC(E|G)$")>; +def : InstRW<[WLat30, MCD], (instregex "TRAP(2|4)$")>; +def : InstRW<[WLat30, MCD], (instregex "SIG(P|A)$")>; +def : InstRW<[WLat30, MCD], (instregex "SIE$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Measurement Facility Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat1, FXb, NormalGr], (instregex "LPP$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "ECPGA$")>; +def : InstRW<[WLat30, WLat30, MCD], (instregex "E(C|P)CTR$")>; +def : InstRW<[WLat30, MCD], (instregex "LCCTL$")>; +def : InstRW<[WLat30, MCD], (instregex "L(P|S)CTL$")>; +def : InstRW<[WLat30, MCD], (instregex "Q(S|CTR)I$")>; +def : InstRW<[WLat30, MCD], (instregex "S(C|P)CTR$")>; + +//===----------------------------------------------------------------------===// +// System: I/O Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[WLat30, MCD], (instregex "(C|H|R|X)SCH$")>; +def : InstRW<[WLat30, MCD], (instregex "(M|S|ST|T)SCH$")>; +def : InstRW<[WLat30, MCD], (instregex "RCHP$")>; +def : InstRW<[WLat30, MCD], (instregex "SCHM$")>; +def : InstRW<[WLat30, MCD], (instregex "STC(PS|RW)$")>; +def : InstRW<[WLat30, MCD], (instregex "TPI$")>; +def : InstRW<[WLat30, MCD], (instregex "SAL$")>; + +} + diff --git a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp index b3238b3da5c..e79dfc5b4b9 100644 --- a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -46,6 +46,7 @@ private: bool shortenOn001(MachineInstr &MI, unsigned Opcode); bool shortenOn001AddCC(MachineInstr &MI, unsigned Opcode); bool shortenFPConv(MachineInstr &MI, unsigned Opcode); + bool shortenSelect(MachineInstr &MI, unsigned Opcode); const SystemZInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -175,6 +176,23 @@ bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) { return false; } +// MI is a three-operand select instruction. If one of the sources match +// the destination, convert to the equivalent load-on-condition. +bool SystemZShortenInst::shortenSelect(MachineInstr &MI, unsigned Opcode) { + if (MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) { + MI.setDesc(TII->get(Opcode)); + MI.tieOperands(0, 1); + return true; + } + if (MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) { + TII->commuteInstruction(MI, false, 1, 2); + MI.setDesc(TII->get(Opcode)); + MI.tieOperands(0, 1); + return true; + } + return false; +} + // Process all instructions in MBB. Return true if something changed. bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { bool Changed = false; @@ -195,6 +213,18 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { Changed |= shortenIIF(MI, SystemZ::LLIHL, SystemZ::LLIHH); break; + case SystemZ::SELR: + Changed |= shortenSelect(MI, SystemZ::LOCR); + break; + + case SystemZ::SELFHR: + Changed |= shortenSelect(MI, SystemZ::LOCFHR); + break; + + case SystemZ::SELGR: + Changed |= shortenSelect(MI, SystemZ::LOCGR); + break; + case SystemZ::WFADB: Changed |= shortenOn001AddCC(MI, SystemZ::ADBR); break; diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp index 127d9fb99cd..5e8af81842c 100644 --- a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -55,6 +55,9 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU, HasMessageSecurityAssist7(false), HasMessageSecurityAssist8(false), HasVectorEnhancements1(false), HasVectorPackedDecimal(false), HasInsertReferenceBitsMultiple(false), + HasMiscellaneousExtensions3(false), HasMessageSecurityAssist9(false), + HasVectorEnhancements2(false), HasVectorPackedDecimalEnhancement(false), + HasEnhancedSort(false), HasDeflateConversion(false), TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), TSInfo(), FrameLowering() {} diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/llvm/lib/Target/SystemZ/SystemZSubtarget.h index cb9425aea87..fa3f65d93c9 100644 --- a/llvm/lib/Target/SystemZ/SystemZSubtarget.h +++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.h @@ -62,6 +62,12 @@ protected: bool HasVectorEnhancements1; bool HasVectorPackedDecimal; bool HasInsertReferenceBitsMultiple; + bool HasMiscellaneousExtensions3; + bool HasMessageSecurityAssist9; + bool HasVectorEnhancements2; + bool HasVectorPackedDecimalEnhancement; + bool HasEnhancedSort; + bool HasDeflateConversion; private: Triple TargetTriple; @@ -209,6 +215,30 @@ public: return HasInsertReferenceBitsMultiple; } + // Return true if the target has the miscellaneous-extensions facility 3. + bool hasMiscellaneousExtensions3() const { + return HasMiscellaneousExtensions3; + } + + // Return true if the target has the message-security-assist + // extension facility 9. + bool hasMessageSecurityAssist9() const { return HasMessageSecurityAssist9; } + + // Return true if the target has the vector-enhancements facility 2. + bool hasVectorEnhancements2() const { return HasVectorEnhancements2; } + + // Return true if the target has the vector-packed-decimal + // enhancement facility. + bool hasVectorPackedDecimalEnhancement() const { + return HasVectorPackedDecimalEnhancement; + } + + // Return true if the target has the enhanced-sort facility. + bool hasEnhancedSort() const { return HasEnhancedSort; } + + // Return true if the target has the deflate-conversion facility. + bool hasDeflateConversion() const { return HasDeflateConversion; } + // Return true if GV can be accessed using LARL for reloc model RM // and code model CM. bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const; diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 304ee79d2b2..145cf87ef9f 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -466,6 +466,27 @@ int SystemZTTIImpl::getArithmeticInstrCost( if (Opcode == Instruction::FRem) return LIBCALL_COST; + // Give discount for some combined logical operations if supported. + if (Args.size() == 2 && ST->hasMiscellaneousExtensions3()) { + if (Opcode == Instruction::Xor) { + for (const Value *A : Args) { + if (const Instruction *I = dyn_cast<Instruction>(A)) + if (I->hasOneUse() && + (I->getOpcode() == Instruction::And || + I->getOpcode() == Instruction::Or || + I->getOpcode() == Instruction::Xor)) + return 0; + } + } + else if (Opcode == Instruction::Or || Opcode == Instruction::And) { + for (const Value *A : Args) { + if (const Instruction *I = dyn_cast<Instruction>(A)) + if (I->hasOneUse() && I->getOpcode() == Instruction::Xor) + return 0; + } + } + } + // Or requires one instruction, although it has custom handling for i64. if (Opcode == Instruction::Or) return 1; @@ -686,9 +707,9 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, // TODO: Fix base implementation which could simplify things a bit here // (seems to miss on differentiating on scalar/vector types). - // Only 64 bit vector conversions are natively supported. - if (DstScalarBits == 64) { - if (SrcScalarBits == 64) + // Only 64 bit vector conversions are natively supported before arch13. + if (DstScalarBits == 64 || ST->hasVectorEnhancements2()) { + if (SrcScalarBits == DstScalarBits) return NumDstVectors; if (SrcScalarBits == 1) @@ -856,7 +877,7 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, case Instruction::Select: if (ValTy->isFloatingPointTy()) return 4; // No load on condition for FP - costs a conditional jump. - return 1; // Load On Condition. + return 1; // Load On Condition / Select Register. } } @@ -1009,7 +1030,8 @@ int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, (Src->isVectorTy() ? getNumVectorRegs(Src) : getNumberOfParts(Src)); // Store/Load reversed saves one instruction. - if (!Src->isVectorTy() && NumOps == 1 && I != nullptr) { + if (((!Src->isVectorTy() && NumOps == 1) || ST->hasVectorEnhancements2()) && + I != nullptr) { if (Opcode == Instruction::Load && I->hasOneUse()) { const Instruction *LdUser = cast<Instruction>(*I->user_begin()); // In case of load -> bswap -> store, return normal cost for the load. diff --git a/llvm/test/Analysis/CostModel/SystemZ/fp-cast.ll b/llvm/test/Analysis/CostModel/SystemZ/fp-cast.ll index cbad825e486..9a43ca31838 100644 --- a/llvm/test/Analysis/CostModel/SystemZ/fp-cast.ll +++ b/llvm/test/Analysis/CostModel/SystemZ/fp-cast.ll @@ -1,4 +1,7 @@ -; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 \ +; RUN: | FileCheck %s -check-prefixes=CHECK,Z13 +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=arch13 \ +; RUN: | FileCheck %s -check-prefixes=CHECK,AR13 ; ; Note: The scalarized vector instructions costs are not including any ; extracts, due to the undef operands. @@ -114,7 +117,8 @@ define void @fptosi() { ; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v18 = fptosi <2 x double> undef to <2 x i16> ; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v19 = fptosi <2 x double> undef to <2 x i8> ; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v20 = fptosi <2 x float> undef to <2 x i64> -; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v21 = fptosi <2 x float> undef to <2 x i32> +; Z13: Cost Model: Found an estimated cost of 12 for instruction: %v21 = fptosi <2 x float> undef to <2 x i32> +; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v21 = fptosi <2 x float> undef to <2 x i32> ; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v22 = fptosi <2 x float> undef to <2 x i16> ; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v23 = fptosi <2 x float> undef to <2 x i8> ; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v24 = fptosi <4 x fp128> undef to <4 x i64> @@ -126,7 +130,8 @@ define void @fptosi() { ; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v30 = fptosi <4 x double> undef to <4 x i16> ; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v31 = fptosi <4 x double> undef to <4 x i8> ; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %v32 = fptosi <4 x float> undef to <4 x i64> -; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v33 = fptosi <4 x float> undef to <4 x i32> +; Z13: Cost Model: Found an estimated cost of 12 for instruction: %v33 = fptosi <4 x float> undef to <4 x i32> +; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v33 = fptosi <4 x float> undef to <4 x i32> ; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v34 = fptosi <4 x float> undef to <4 x i16> ; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v35 = fptosi <4 x float> undef to <4 x i8> ; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v36 = fptosi <8 x fp128> undef to <8 x i64> @@ -138,7 +143,8 @@ define void @fptosi() { ; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v42 = fptosi <8 x double> undef to <8 x i16> ; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v43 = fptosi <8 x double> undef to <8 x i8> ; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %v44 = fptosi <8 x float> undef to <8 x i64> -; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v45 = fptosi <8 x float> undef to <8 x i32> +; Z13: Cost Model: Found an estimated cost of 24 for instruction: %v45 = fptosi <8 x float> undef to <8 x i32> +; AR13: Cost Model: Found an estimated cost of 2 for instruction: %v45 = fptosi <8 x float> undef to <8 x i32> ; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v46 = fptosi <8 x float> undef to <8 x i16> ; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v47 = fptosi <8 x float> undef to <8 x i8> ; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v48 = fptosi <16 x double> undef to <16 x i64> @@ -146,7 +152,8 @@ define void @fptosi() { ; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v50 = fptosi <16 x double> undef to <16 x i16> ; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v51 = fptosi <16 x double> undef to <16 x i8> ; CHECK: Cost Model: Found an estimated cost of 40 for instruction: %v52 = fptosi <16 x float> undef to <16 x i64> -; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v53 = fptosi <16 x float> undef to <16 x i32> +; Z13: Cost Model: Found an estimated cost of 48 for instruction: %v53 = fptosi <16 x float> undef to <16 x i32> +; AR13: Cost Model: Found an estimated cost of 4 for instruction: %v53 = fptosi <16 x float> undef to <16 x i32> ; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v54 = fptosi <16 x float> undef to <16 x i16> ; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v55 = fptosi <16 x float> undef to <16 x i8> @@ -233,7 +240,8 @@ define void @fptoui() { ; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v18 = fptoui <2 x double> undef to <2 x i16> ; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v19 = fptoui <2 x double> undef to <2 x i8> ; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v20 = fptoui <2 x float> undef to <2 x i64> -; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v21 = fptoui <2 x float> undef to <2 x i32> +; Z13: Cost Model: Found an estimated cost of 12 for instruction: %v21 = fptoui <2 x float> undef to <2 x i32> +; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v21 = fptoui <2 x float> undef to <2 x i32> ; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v22 = fptoui <2 x float> undef to <2 x i16> ; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v23 = fptoui <2 x float> undef to <2 x i8> ; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v24 = fptoui <4 x fp128> undef to <4 x i64> @@ -245,7 +253,8 @@ define void @fptoui() { ; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v30 = fptoui <4 x double> undef to <4 x i16> ; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v31 = fptoui <4 x double> undef to <4 x i8> ; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %v32 = fptoui <4 x float> undef to <4 x i64> -; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v33 = fptoui <4 x float> undef to <4 x i32> +; Z13: Cost Model: Found an estimated cost of 12 for instruction: %v33 = fptoui <4 x float> undef to <4 x i32> +; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v33 = fptoui <4 x float> undef to <4 x i32> ; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v34 = fptoui <4 x float> undef to <4 x i16> ; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v35 = fptoui <4 x float> undef to <4 x i8> ; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v36 = fptoui <8 x fp128> undef to <8 x i64> @@ -257,7 +266,8 @@ define void @fptoui() { ; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v42 = fptoui <8 x double> undef to <8 x i16> ; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v43 = fptoui <8 x double> undef to <8 x i8> ; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %v44 = fptoui <8 x float> undef to <8 x i64> -; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v45 = fptoui <8 x float> undef to <8 x i32> +; Z13: Cost Model: Found an estimated cost of 24 for instruction: %v45 = fptoui <8 x float> undef to <8 x i32> +; AR13: Cost Model: Found an estimated cost of 2 for instruction: %v45 = fptoui <8 x float> undef to <8 x i32> ; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v46 = fptoui <8 x float> undef to <8 x i16> ; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v47 = fptoui <8 x float> undef to <8 x i8> ; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v48 = fptoui <16 x double> undef to <16 x i64> @@ -265,7 +275,8 @@ define void @fptoui() { ; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v50 = fptoui <16 x double> undef to <16 x i16> ; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v51 = fptoui <16 x double> undef to <16 x i8> ; CHECK: Cost Model: Found an estimated cost of 40 for instruction: %v52 = fptoui <16 x float> undef to <16 x i64> -; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v53 = fptoui <16 x float> undef to <16 x i32> +; Z13: Cost Model: Found an estimated cost of 48 for instruction: %v53 = fptoui <16 x float> undef to <16 x i32> +; AR13: Cost Model: Found an estimated cost of 4 for instruction: %v53 = fptoui <16 x float> undef to <16 x i32> ; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v54 = fptoui <16 x float> undef to <16 x i16> ; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v55 = fptoui <16 x float> undef to <16 x i8> @@ -379,7 +390,8 @@ define void @sitofp() { ; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v14 = sitofp <2 x i64> undef to <2 x float> ; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v15 = sitofp <2 x i32> undef to <2 x fp128> ; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v16 = sitofp <2 x i32> undef to <2 x double> -; CHECK: Cost Model: Found an estimated cost of 14 for instruction: %v17 = sitofp <2 x i32> undef to <2 x float> +; Z13: Cost Model: Found an estimated cost of 14 for instruction: %v17 = sitofp <2 x i32> undef to <2 x float> +; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v17 = sitofp <2 x i32> undef to <2 x float> ; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v18 = sitofp <2 x i16> undef to <2 x fp128> ; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v19 = sitofp <2 x i16> undef to <2 x double> ; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v20 = sitofp <2 x i16> undef to <2 x float> @@ -391,7 +403,8 @@ define void @sitofp() { ; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v26 = sitofp <4 x i64> undef to <4 x float> ; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v27 = sitofp <4 x i32> undef to <4 x fp128> ; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v28 = sitofp <4 x i32> undef to <4 x double> -; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v29 = sitofp <4 x i32> undef to <4 x float> +; Z13: Cost Model: Found an estimated cost of 13 for instruction: %v29 = sitofp <4 x i32> undef to <4 x float> +; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v29 = sitofp <4 x i32> undef to <4 x float> ; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v30 = sitofp <4 x i16> undef to <4 x fp128> ; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v31 = sitofp <4 x i16> undef to <4 x double> ; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v32 = sitofp <4 x i16> undef to <4 x float> @@ -403,7 +416,8 @@ define void @sitofp() { ; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v38 = sitofp <8 x i64> undef to <8 x float> ; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v39 = sitofp <8 x i32> undef to <8 x fp128> ; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v40 = sitofp <8 x i32> undef to <8 x double> -; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v41 = sitofp <8 x i32> undef to <8 x float> +; Z13: Cost Model: Found an estimated cost of 25 for instruction: %v41 = sitofp <8 x i32> undef to <8 x float> +; AR13: Cost Model: Found an estimated cost of 2 for instruction: %v41 = sitofp <8 x i32> undef to <8 x float> ; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v42 = sitofp <8 x i16> undef to <8 x fp128> ; CHECK: Cost Model: Found an estimated cost of 33 for instruction: %v43 = sitofp <8 x i16> undef to <8 x double> ; CHECK: Cost Model: Found an estimated cost of 33 for instruction: %v44 = sitofp <8 x i16> undef to <8 x float> @@ -413,7 +427,8 @@ define void @sitofp() { ; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v48 = sitofp <16 x i64> undef to <16 x double> ; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v49 = sitofp <16 x i64> undef to <16 x float> ; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v50 = sitofp <16 x i32> undef to <16 x double> -; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v51 = sitofp <16 x i32> undef to <16 x float> +; Z13: Cost Model: Found an estimated cost of 49 for instruction: %v51 = sitofp <16 x i32> undef to <16 x float> +; AR13: Cost Model: Found an estimated cost of 4 for instruction: %v51 = sitofp <16 x i32> undef to <16 x float> ; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v52 = sitofp <16 x i16> undef to <16 x double> ; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v53 = sitofp <16 x i16> undef to <16 x float> ; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v54 = sitofp <16 x i8> undef to <16 x double> @@ -497,7 +512,8 @@ define void @uitofp() { ; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v14 = uitofp <2 x i64> undef to <2 x float> ; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v15 = uitofp <2 x i32> undef to <2 x fp128> ; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v16 = uitofp <2 x i32> undef to <2 x double> -; CHECK: Cost Model: Found an estimated cost of 14 for instruction: %v17 = uitofp <2 x i32> undef to <2 x float> +; Z13: Cost Model: Found an estimated cost of 14 for instruction: %v17 = uitofp <2 x i32> undef to <2 x float> +; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v17 = uitofp <2 x i32> undef to <2 x float> ; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v18 = uitofp <2 x i16> undef to <2 x fp128> ; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v19 = uitofp <2 x i16> undef to <2 x double> ; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v20 = uitofp <2 x i16> undef to <2 x float> @@ -509,7 +525,8 @@ define void @uitofp() { ; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v26 = uitofp <4 x i64> undef to <4 x float> ; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v27 = uitofp <4 x i32> undef to <4 x fp128> ; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v28 = uitofp <4 x i32> undef to <4 x double> -; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v29 = uitofp <4 x i32> undef to <4 x float> +; Z13: Cost Model: Found an estimated cost of 13 for instruction: %v29 = uitofp <4 x i32> undef to <4 x float> +; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v29 = uitofp <4 x i32> undef to <4 x float> ; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v30 = uitofp <4 x i16> undef to <4 x fp128> ; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v31 = uitofp <4 x i16> undef to <4 x double> ; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v32 = uitofp <4 x i16> undef to <4 x float> @@ -521,7 +538,8 @@ define void @uitofp() { ; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v38 = uitofp <8 x i64> undef to <8 x float> ; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v39 = uitofp <8 x i32> undef to <8 x fp128> ; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v40 = uitofp <8 x i32> undef to <8 x double> -; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v41 = uitofp <8 x i32> undef to <8 x float> +; Z13: Cost Model: Found an estimated cost of 25 for instruction: %v41 = uitofp <8 x i32> undef to <8 x float> +; AR13: Cost Model: Found an estimated cost of 2 for instruction: %v41 = uitofp <8 x i32> undef to <8 x float> ; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v42 = uitofp <8 x i16> undef to <8 x fp128> ; CHECK: Cost Model: Found an estimated cost of 33 for instruction: %v43 = uitofp <8 x i16> undef to <8 x double> ; CHECK: Cost Model: Found an estimated cost of 33 for instruction: %v44 = uitofp <8 x i16> undef to <8 x float> @@ -531,7 +549,8 @@ define void @uitofp() { ; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v48 = uitofp <16 x i64> undef to <16 x double> ; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v49 = uitofp <16 x i64> undef to <16 x float> ; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v50 = uitofp <16 x i32> undef to <16 x double> -; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v51 = uitofp <16 x i32> undef to <16 x float> +; Z13: Cost Model: Found an estimated cost of 49 for instruction: %v51 = uitofp <16 x i32> undef to <16 x float> +; AR13: Cost Model: Found an estimated cost of 4 for instruction: %v51 = uitofp <16 x i32> undef to <16 x float> ; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v52 = uitofp <16 x i16> undef to <16 x double> ; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v53 = uitofp <16 x i16> undef to <16 x float> ; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v54 = uitofp <16 x i8> undef to <16 x double> diff --git a/llvm/test/Analysis/CostModel/SystemZ/intrinsics.ll b/llvm/test/Analysis/CostModel/SystemZ/intrinsics.ll index f83cf5a7c3d..bbde627838b 100644 --- a/llvm/test/Analysis/CostModel/SystemZ/intrinsics.ll +++ b/llvm/test/Analysis/CostModel/SystemZ/intrinsics.ll @@ -1,4 +1,7 @@ -; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 \ +; RUN: | FileCheck %s -check-prefixes=CHECK,Z13 +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=arch13 \ +; RUN: | FileCheck %s -check-prefixes=CHECK,AR13 define void @bswap_i64(i64 %arg, <2 x i64> %arg2) { ; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_i64': @@ -63,6 +66,32 @@ define void @bswap_i64_mem(i64* %src, i64 %arg, i64* %dst) { ret void } +define void @bswap_v2i64_mem(<2 x i64>* %src, <2 x i64> %arg, <2 x i64>* %dst) { +; CHECK:Printing analysis 'Cost Model Analysis' for function 'bswap_v2i64_mem': +; Z13: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load <2 x i64>, <2 x i64>* %src +; AR13: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <2 x i64>, <2 x i64>* %src +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld1) +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %arg) +; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %swp2, <2 x i64>* %dst +; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <2 x i64> %swp2, <2 x i64>* %dst +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load <2 x i64>, <2 x i64>* %src +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld2) +; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %swp3, <2 x i64>* %dst +; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <2 x i64> %swp3, <2 x i64>* %dst + + %Ld1 = load <2 x i64>, <2 x i64>* %src + %swp1 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld1) + + %swp2 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %arg) + store <2 x i64> %swp2, <2 x i64>* %dst + + %Ld2 = load <2 x i64>, <2 x i64>* %src + %swp3 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld2) + store <2 x i64> %swp3, <2 x i64>* %dst + + ret void +} + define void @bswap_i32_mem(i32* %src, i32 %arg, i32* %dst) { ; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_i32_mem': ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load i32, i32* %src @@ -85,6 +114,31 @@ define void @bswap_i32_mem(i32* %src, i32 %arg, i32* %dst) { ret void } +define void @bswap_v4i32_mem(<4 x i32>* %src, <4 x i32> %arg, <4 x i32>* %dst) { +; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_v4i32_mem': +; Z13: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load <4 x i32>, <4 x i32>* %src +; AR13: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <4 x i32>, <4 x i32>* %src +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld1) +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %arg) +; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %swp2, <4 x i32>* %dst +; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <4 x i32> %swp2, <4 x i32>* %dst +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load <4 x i32>, <4 x i32>* %src +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld2) +; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %swp3, <4 x i32>* %dst +; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <4 x i32> %swp3, <4 x i32>* %dst +%Ld1 = load <4 x i32>, <4 x i32>* %src + %swp1 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld1) + + %swp2 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %arg) + store <4 x i32> %swp2, <4 x i32>* %dst + + %Ld2 = load <4 x i32>, <4 x i32>* %src + %swp3 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld2) + store <4 x i32> %swp3, <4 x i32>* %dst + + ret void +} + define void @bswap_i16_mem(i16* %src, i16 %arg, i16* %dst) { ; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_i16_mem': ; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load i16, i16* %src @@ -107,6 +161,30 @@ define void @bswap_i16_mem(i16* %src, i16 %arg, i16* %dst) { ret void } +define void @bswap_v8i16_mem(<8 x i16>* %src, <8 x i16> %arg, <8 x i16>* %dst) { +; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_v8i16_mem': +; Z13: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load <8 x i16>, <8 x i16>* %src +; AR13: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <8 x i16>, <8 x i16>* %src +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld1) +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %arg) +; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %swp2, <8 x i16>* %dst +; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <8 x i16> %swp2, <8 x i16>* %dst +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load <8 x i16>, <8 x i16>* %src +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld2) +; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %swp3, <8 x i16>* %dst +; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <8 x i16> %swp3, <8 x i16>* %dst +%Ld1 = load <8 x i16>, <8 x i16>* %src + %swp1 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld1) + + %swp2 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %arg) + store <8 x i16> %swp2, <8 x i16>* %dst + + %Ld2 = load <8 x i16>, <8 x i16>* %src + %swp3 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld2) + store <8 x i16> %swp3, <8 x i16>* %dst + + ret void +} declare i64 @llvm.bswap.i64(i64) declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) diff --git a/llvm/test/Analysis/CostModel/SystemZ/logic-miscext3.ll b/llvm/test/Analysis/CostModel/SystemZ/logic-miscext3.ll new file mode 100644 index 00000000000..86706309e6d --- /dev/null +++ b/llvm/test/Analysis/CostModel/SystemZ/logic-miscext3.ll @@ -0,0 +1,97 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 \ +; RUN: | FileCheck %s -check-prefixes=CHECK,Z13 +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=arch13 \ +; RUN: | FileCheck %s -check-prefixes=CHECK,AR13 + +define void @fun0(i32 %a) { +; CHECK-LABEL: Printing analysis 'Cost Model Analysis' for function 'fun0': +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c0 = xor i32 %l0, -1 +; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res0 = or i32 %a, %c0 +; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res0 = or i32 %a, %c0 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c1 = xor i32 %l1, -1 +; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res1 = and i32 %a, %c1 +; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res1 = and i32 %a, %c1 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c2 = and i32 %l2, %a +; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res2 = xor i32 %c2, -1 +; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res2 = xor i32 %c2, -1 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c3 = or i32 %l3, %a +; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res3 = xor i32 %c3, -1 +; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res3 = xor i32 %c3, -1 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c4 = xor i32 %l4, %a +; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res4 = xor i32 %c4, -1 +; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res4 = xor i32 %c4, -1 + +entry: + %l0 = load i32, i32* undef + %c0 = xor i32 %l0, -1 + %res0 = or i32 %a, %c0 + store i32 %res0, i32* undef + + %l1 = load i32, i32* undef + %c1 = xor i32 %l1, -1 + %res1 = and i32 %a, %c1 + store i32 %res1, i32* undef + + %l2 = load i32, i32* undef + %c2 = and i32 %l2, %a + %res2 = xor i32 %c2, -1 + store i32 %res2, i32* undef + + %l3 = load i32, i32* undef + %c3 = or i32 %l3, %a + %res3 = xor i32 %c3, -1 + store i32 %res3, i32* undef + + %l4 = load i32, i32* undef + %c4 = xor i32 %l4, %a + %res4 = xor i32 %c4, -1 + store i32 %res4, i32* undef + + ret void +} + +define void @fun1(i64 %a) { +; CHECK-LABEL: Printing analysis 'Cost Model Analysis' for function 'fun1': +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c0 = xor i64 %l0, -1 +; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res0 = or i64 %a, %c0 +; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res0 = or i64 %a, %c0 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c1 = xor i64 %l1, -1 +; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res1 = and i64 %a, %c1 +; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res1 = and i64 %a, %c1 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c2 = and i64 %l2, %a +; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res2 = xor i64 %c2, -1 +; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res2 = xor i64 %c2, -1 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c3 = or i64 %l3, %a +; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res3 = xor i64 %c3, -1 +; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res3 = xor i64 %c3, -1 +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c4 = xor i64 %l4, %a +; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res4 = xor i64 %c4, -1 +; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res4 = xor i64 %c4, -1 +entry: + %l0 = load i64, i64* undef + %c0 = xor i64 %l0, -1 + %res0 = or i64 %a, %c0 + store i64 %res0, i64* undef + + %l1 = load i64, i64* undef + %c1 = xor i64 %l1, -1 + %res1 = and i64 %a, %c1 + store i64 %res1, i64* undef + + %l2 = load i64, i64* undef + %c2 = and i64 %l2, %a + %res2 = xor i64 %c2, -1 + store i64 %res2, i64* undef + + %l3 = load i64, i64* undef + %c3 = or i64 %l3, %a + %res3 = xor i64 %c3, -1 + store i64 %res3, i64* undef + + %l4 = load i64, i64* undef + %c4 = xor i64 %l4, %a + %res4 = xor i64 %c4, -1 + store i64 %res4, i64* undef + + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/cond-move-01.ll b/llvm/test/CodeGen/SystemZ/cond-move-01.ll index 0be81c3ff80..fad5012be10 100644 --- a/llvm/test/CodeGen/SystemZ/cond-move-01.ll +++ b/llvm/test/CodeGen/SystemZ/cond-move-01.ll @@ -5,6 +5,9 @@ ; Run the test again to make sure it still works the same even ; in the presence of the load-store-on-condition-2 facility. ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs | FileCheck %s +; +; And again in the presence of the select instructions. +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 -verify-machineinstrs | FileCheck %s ; Test LOCR. define i32 @f1(i32 %a, i32 %b, i32 %limit) { diff --git a/llvm/test/CodeGen/SystemZ/cond-move-02.ll b/llvm/test/CodeGen/SystemZ/cond-move-02.ll index ea0ef98335c..c8ab5331216 100644 --- a/llvm/test/CodeGen/SystemZ/cond-move-02.ll +++ b/llvm/test/CodeGen/SystemZ/cond-move-02.ll @@ -1,6 +1,11 @@ ; Test LOCHI and LOCGHI. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs | FileCheck %s +; +; Run the test again to make sure it still works the same even +; in the presence of the select instructions. +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 -verify-machineinstrs | FileCheck %s + define i32 @f1(i32 %x) { ; CHECK-LABEL: f1: diff --git a/llvm/test/CodeGen/SystemZ/cond-move-03.ll b/llvm/test/CodeGen/SystemZ/cond-move-03.ll index a9bf1c80310..0f4d080d6bc 100644 --- a/llvm/test/CodeGen/SystemZ/cond-move-03.ll +++ b/llvm/test/CodeGen/SystemZ/cond-move-03.ll @@ -3,31 +3,36 @@ ; ; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z13 \ ; RUN: -no-integrated-as | FileCheck %s +; +; Run the test again to make sure it still works the same even +; in the presence of the select instructions. +; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=arch13 \ +; RUN: -no-integrated-as | FileCheck %s define void @f1(i32 %limit) { ; CHECK-LABEL: f1: ; CHECK-DAG: stepa [[REG1:%r[0-5]]] ; CHECK-DAG: stepb [[REG2:%r[0-5]]] ; CHECK-DAG: clfi %r2, 42 -; CHECK: locfhrl [[REG2]], [[REG1]] -; CHECK: stepc [[REG2]] +; CHECK: locfhrhe [[REG1]], [[REG2]] +; CHECK: stepc [[REG1]] ; CHECK: br %r14 %a = call i32 asm sideeffect "stepa $0", "=h"() %b = call i32 asm sideeffect "stepb $0", "=h"() %cond = icmp ult i32 %limit, 42 %res = select i1 %cond, i32 %a, i32 %b call void asm sideeffect "stepc $0", "h"(i32 %res) + call void asm sideeffect "use $0", "h"(i32 %b) ret void } -; FIXME: We should commute the LOCRMux to save one move. define void @f2(i32 %limit) { ; CHECK-LABEL: f2: ; CHECK-DAG: stepa [[REG1:%r[0-5]]] ; CHECK-DAG: stepb [[REG2:%r[0-5]]] -; CHECK-DAG: clijhe %r2, 42, -; CHECK: risblg [[REG2]], [[REG1]], 0, 159, 32 +; CHECK-DAG: clijl %r2, 42, [[LABEL:.LBB[0-9_]+]] ; CHECK: risbhg [[REG1]], [[REG2]], 0, 159, 32 +; CHECK: [[LABEL]] ; CHECK: stepc [[REG1]] ; CHECK: br %r14 %dummy = call i32 asm sideeffect "dummy $0", "=h"() @@ -37,16 +42,18 @@ define void @f2(i32 %limit) { %res = select i1 %cond, i32 %a, i32 %b call void asm sideeffect "stepc $0", "h"(i32 %res) call void asm sideeffect "dummy $0", "h"(i32 %dummy) + call void asm sideeffect "use $0", "r"(i32 %b) ret void } define void @f3(i32 %limit) { ; CHECK-LABEL: f3: -; CHECK-DAG: stepa [[REG2:%r[0-5]]] -; CHECK-DAG: stepb [[REG1:%r[0-5]]] -; CHECK-DAG: clijhe %r2, 42, -; CHECK: risbhg [[REG1]], [[REG2]], 0, 159, 32 -; CHECK: stepc [[REG1]] +; CHECK-DAG: stepa [[REG1:%r[0-5]]] +; CHECK-DAG: stepb [[REG2:%r[0-5]]] +; CHECK-DAG: clijhe %r2, 42, [[LABEL:.LBB[0-9_]+]] +; CHECK: risbhg [[REG2]], [[REG1]], 0, 159, 32 +; CHECK: [[LABEL]] +; CHECK: stepc [[REG2]] ; CHECK: br %r14 %dummy = call i32 asm sideeffect "dummy $0", "=h"() %a = call i32 asm sideeffect "stepa $0", "=r"() @@ -55,17 +62,17 @@ define void @f3(i32 %limit) { %res = select i1 %cond, i32 %a, i32 %b call void asm sideeffect "stepc $0", "h"(i32 %res) call void asm sideeffect "dummy $0", "h"(i32 %dummy) + call void asm sideeffect "use $0", "r"(i32 %a) ret void } -; FIXME: We should commute the LOCRMux to save one move. define void @f4(i32 %limit) { ; CHECK-LABEL: f4: ; CHECK-DAG: stepa [[REG1:%r[0-5]]] ; CHECK-DAG: stepb [[REG2:%r[0-5]]] -; CHECK-DAG: clijhe %r2, 42, -; CHECK: risbhg [[REG2]], [[REG1]], 0, 159, 32 +; CHECK-DAG: clijl %r2, 42, [[LABEL:.LBB[0-9_]+]] ; CHECK: risblg [[REG1]], [[REG2]], 0, 159, 32 +; CHECK: [[LABEL]] ; CHECK: stepc [[REG1]] ; CHECK: br %r14 %dummy = call i32 asm sideeffect "dummy $0", "=h"() @@ -75,6 +82,7 @@ define void @f4(i32 %limit) { %res = select i1 %cond, i32 %a, i32 %b call void asm sideeffect "stepc $0", "r"(i32 %res) call void asm sideeffect "dummy $0", "h"(i32 %dummy) + call void asm sideeffect "use $0", "h"(i32 %b) ret void } @@ -82,8 +90,9 @@ define void @f5(i32 %limit) { ; CHECK-LABEL: f5: ; CHECK-DAG: stepa [[REG2:%r[0-5]]] ; CHECK-DAG: stepb [[REG1:%r[0-5]]] -; CHECK-DAG: clijhe %r2, 42, +; CHECK-DAG: clijhe %r2, 42, [[LABEL:.LBB[0-9_]+]] ; CHECK: risblg [[REG1]], [[REG2]], 0, 159, 32 +; CHECK: [[LABEL]] ; CHECK: stepc [[REG1]] ; CHECK: br %r14 %dummy = call i32 asm sideeffect "dummy $0", "=h"() @@ -102,8 +111,8 @@ define void @f6(i32 %limit) { ; CHECK-DAG: stepa [[REG1:%r[0-5]]] ; CHECK-DAG: stepb [[REG2:%r[0-5]]] ; CHECK-DAG: clfi %r2, 41 -; CHECK: locfhrle [[REG2]], [[REG1]] -; CHECK: stepc [[REG2]] +; CHECK: locfhrh [[REG1]], [[REG2]] +; CHECK: stepc [[REG1]] ; CHECK: br %r14 entry: %a = call i32 asm sideeffect "stepa $0", "=h"() @@ -117,6 +126,7 @@ if.then: return: %res = phi i32 [ %a, %if.then ], [ %b, %entry ] call void asm sideeffect "stepc $0", "h"(i32 %res) + call void asm sideeffect "use $0", "h"(i32 %b) ret void } @@ -126,8 +136,8 @@ define void @f7(i32 %limit) { ; CHECK-DAG: stepa [[REG1:%r[0-5]]] ; CHECK-DAG: stepb [[REG2:%r[0-5]]] ; CHECK-DAG: clfi %r2, 41 -; CHECK: locfhrh [[REG2]], [[REG1]] -; CHECK: stepc [[REG2]] +; CHECK: locfhrle [[REG1]], [[REG2]] +; CHECK: stepc [[REG1]] ; CHECK: br %r14 entry: %a = call i32 asm sideeffect "stepa $0", "=h"() @@ -141,6 +151,7 @@ if.then: return: %res = phi i32 [ %b, %if.then ], [ %a, %entry ] call void asm sideeffect "stepc $0", "h"(i32 %res) + call void asm sideeffect "use $0", "h"(i32 %b) ret void } diff --git a/llvm/test/CodeGen/SystemZ/cond-move-06.ll b/llvm/test/CodeGen/SystemZ/cond-move-06.ll new file mode 100644 index 00000000000..3c133d91671 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/cond-move-06.ll @@ -0,0 +1,121 @@ +; Test SELR and SELGR. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 -verify-machineinstrs | FileCheck %s + +; Test SELR. +define i32 @f1(i32 %limit, i32 %a, i32 %b) { +; CHECK-LABEL: f1: +; CHECK: clfi %r2, 42 +; CHECK: selrl %r2, %r3, %r4 +; CHECK: br %r14 + %cond = icmp ult i32 %limit, 42 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +; Test SELGR. +define i64 @f2(i64 %limit, i64 %a, i64 %b) { +; CHECK-LABEL: f2: +; CHECK: clgfi %r2, 42 +; CHECK: selgrl %r2, %r3, %r4 +; CHECK: br %r14 + %cond = icmp ult i64 %limit, 42 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Test SELR in a case that could use COMPARE AND BRANCH. We prefer using +; SELR if possible. +define i32 @f3(i32 %limit, i32 %a, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: chi %r2, 42 +; CHECK: selre %r2, %r3, %r4 +; CHECK: br %r14 + %cond = icmp eq i32 %limit, 42 + %res = select i1 %cond, i32 %a, i32 %b + ret i32 %res +} + +; ...and again for SELGR. +define i64 @f4(i64 %limit, i64 %a, i64 %b) { +; CHECK-LABEL: f4: +; CHECK: cghi %r2, 42 +; CHECK: selgre %r2, %r3, %r4 +; CHECK: br %r14 + %cond = icmp eq i64 %limit, 42 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} + +; Check that we also get SELR as a result of early if-conversion. +define i32 @f5(i32 %limit, i32 %a, i32 %b) { +; CHECK-LABEL: f5: +; CHECK: clfi %r2, 41 +; CHECK: selrh %r2, %r4, %r3 +; CHECK: br %r14 +entry: + %cond = icmp ult i32 %limit, 42 + br i1 %cond, label %if.then, label %return + +if.then: + br label %return + +return: + %res = phi i32 [ %a, %if.then ], [ %b, %entry ] + ret i32 %res +} + +; ... and likewise for SELGR. +define i64 @f6(i64 %limit, i64 %a, i64 %b) { +; CHECK-LABEL: f6: +; CHECK: clgfi %r2, 41 +; CHECK: selgrh %r2, %r4, %r3 +; CHECK: br %r14 +entry: + %cond = icmp ult i64 %limit, 42 + br i1 %cond, label %if.then, label %return + +if.then: + br label %return + +return: + %res = phi i64 [ %a, %if.then ], [ %b, %entry ] + ret i64 %res +} + +; Check that inverting the condition works as well. +define i32 @f7(i32 %limit, i32 %a, i32 %b) { +; CHECK-LABEL: f7: +; CHECK: clfi %r2, 41 +; CHECK: selrh %r2, %r3, %r4 +; CHECK: br %r14 +entry: + %cond = icmp ult i32 %limit, 42 + br i1 %cond, label %if.then, label %return + +if.then: + br label %return + +return: + %res = phi i32 [ %b, %if.then ], [ %a, %entry ] + ret i32 %res +} + +; ... and likewise for SELGR. +define i64 @f8(i64 %limit, i64 %a, i64 %b) { +; CHECK-LABEL: f8: +; CHECK: clgfi %r2, 41 +; CHECK: selgrh %r2, %r3, %r4 +; CHECK: br %r14 +entry: + %cond = icmp ult i64 %limit, 42 + br i1 %cond, label %if.then, label %return + +if.then: + br label %return + +return: + %res = phi i64 [ %b, %if.then ], [ %a, %entry ] + ret i64 %res +} + diff --git a/llvm/test/CodeGen/SystemZ/cond-move-07.ll b/llvm/test/CodeGen/SystemZ/cond-move-07.ll new file mode 100644 index 00000000000..87123b53379 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/cond-move-07.ll @@ -0,0 +1,76 @@ +; Test SELFHR. +; See comments in asm-18.ll about testing high-word operations. +; +; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=arch13 \ +; RUN: -no-integrated-as | FileCheck %s + +define void @f1(i32 %limit) { +; CHECK-LABEL: f1: +; CHECK-DAG: stepa [[REG1:%r[0-5]]] +; CHECK-DAG: stepb [[REG2:%r[0-5]]] +; CHECK-DAG: clfi %r2, 42 +; CHECK: selfhrl [[REG3:%r[0-5]]], [[REG1]], [[REG2]] +; CHECK: stepc [[REG3]] +; CHECK: br %r14 + %a = call i32 asm sideeffect "stepa $0", "=h"() + %b = call i32 asm sideeffect "stepb $0", "=h"() + %cond = icmp ult i32 %limit, 42 + %res = select i1 %cond, i32 %a, i32 %b + call void asm sideeffect "stepc $0", "h"(i32 %res) + call void asm sideeffect "use $0", "h"(i32 %a) + call void asm sideeffect "use $0", "h"(i32 %b) + ret void +} + +; Check that we also get SELFHR as a result of early if-conversion. +define void @f2(i32 %limit) { +; CHECK-LABEL: f2: +; CHECK-DAG: stepa [[REG1:%r[0-5]]] +; CHECK-DAG: stepb [[REG2:%r[0-5]]] +; CHECK-DAG: clfi %r2, 41 +; CHECK: selfhrh [[REG3:%r[0-5]]], [[REG2]], [[REG1]] +; CHECK: stepc [[REG3]] +; CHECK: br %r14 +entry: + %a = call i32 asm sideeffect "stepa $0", "=h"() + %b = call i32 asm sideeffect "stepb $0", "=h"() + %cond = icmp ult i32 %limit, 42 + br i1 %cond, label %if.then, label %return + +if.then: + br label %return + +return: + %res = phi i32 [ %a, %if.then ], [ %b, %entry ] + call void asm sideeffect "stepc $0", "h"(i32 %res) + call void asm sideeffect "use $0", "h"(i32 %a) + call void asm sideeffect "use $0", "h"(i32 %b) + ret void +} + +; Check that inverting the condition works as well. +define void @f3(i32 %limit) { +; CHECK-LABEL: f3: +; CHECK-DAG: stepa [[REG1:%r[0-5]]] +; CHECK-DAG: stepb [[REG2:%r[0-5]]] +; CHECK-DAG: clfi %r2, 41 +; CHECK: selfhrh [[REG3:%r[0-5]]], [[REG1]], [[REG2]] +; CHECK: stepc [[REG3]] +; CHECK: br %r14 +entry: + %a = call i32 asm sideeffect "stepa $0", "=h"() + %b = call i32 asm sideeffect "stepb $0", "=h"() + %cond = icmp ult i32 %limit, 42 + br i1 %cond, label %if.then, label %return + +if.then: + br label %return + +return: + %res = phi i32 [ %b, %if.then ], [ %a, %entry ] + call void asm sideeffect "stepc $0", "h"(i32 %res) + call void asm sideeffect "use $0", "h"(i32 %a) + call void asm sideeffect "use $0", "h"(i32 %b) + ret void +} + diff --git a/llvm/test/CodeGen/SystemZ/cond-move-08.mir b/llvm/test/CodeGen/SystemZ/cond-move-08.mir new file mode 100644 index 00000000000..aa5c4cd1697 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/cond-move-08.mir @@ -0,0 +1,179 @@ +# RUN: llc -mtriple=s390x-linux-gnu -mcpu=arch13 -start-before=greedy %s -o - \ +# RUN: | FileCheck %s +# +# Test that regalloc manages (via regalloc hints) to avoid a LOCRMux jump +# sequence expansion, and a SELR instuction is emitted. + +--- | + ; ModuleID = 'tc.ll' + source_filename = "tc.ll" + target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" + + @globvar = external global i32 + + declare void @fun() #0 + + define void @fun1() #0 { + bb5: + br label %bb6 + + bb6: ; preds = %bb33, %bb5 + %tmp = phi i1 [ %tmp34, %bb33 ], [ undef, %bb5 ] + br label %bb7 + + bb7: ; preds = %bb7, %bb6 + %lsr.iv1 = phi [512 x i32]* [ %0, %bb7 ], [ undef, %bb6 ] + %tmp8 = phi i32 [ %tmp27, %bb7 ], [ -1000000, %bb6 ] + %tmp9 = phi i64 [ %tmp28, %bb7 ], [ 0, %bb6 ] + %lsr3 = trunc i64 %tmp9 to i32 + %lsr.iv12 = bitcast [512 x i32]* %lsr.iv1 to i32* + %tmp11 = load i32, i32* %lsr.iv12 + %tmp12 = icmp sgt i32 %tmp11, undef + %tmp13 = trunc i64 %tmp9 to i32 + %tmp14 = select i1 %tmp12, i32 %lsr3, i32 0 + %tmp15 = select i1 %tmp12, i32 %tmp13, i32 %tmp8 + %tmp16 = load i32, i32* undef + %tmp17 = select i1 false, i32 undef, i32 %tmp14 + %tmp18 = select i1 false, i32 undef, i32 %tmp15 + %tmp19 = select i1 false, i32 %tmp16, i32 undef + %tmp20 = select i1 undef, i32 undef, i32 %tmp17 + %tmp21 = select i1 undef, i32 undef, i32 %tmp18 + %tmp22 = select i1 undef, i32 undef, i32 %tmp19 + %tmp23 = or i64 %tmp9, 3 + %tmp24 = icmp sgt i32 undef, %tmp22 + %tmp25 = trunc i64 %tmp23 to i32 + %tmp26 = select i1 %tmp24, i32 %tmp25, i32 %tmp20 + %tmp27 = select i1 %tmp24, i32 %tmp25, i32 %tmp21 + %tmp28 = add nuw nsw i64 %tmp9, 4 + %tmp29 = icmp eq i64 undef, 0 + %scevgep = getelementptr [512 x i32], [512 x i32]* %lsr.iv1, i64 0, i64 4 + %0 = bitcast i32* %scevgep to [512 x i32]* + br i1 %tmp29, label %bb30, label %bb7 + + bb30: ; preds = %bb7 + %tmp32 = icmp sgt i32 %tmp27, -1000000 + br i1 %tmp32, label %bb33, label %bb35 + + bb33: ; preds = %bb30 + call void @fun() + store i32 %tmp26, i32* @globvar + %tmp34 = icmp ugt i32 undef, 1 + br label %bb6 + + bb35: ; preds = %bb30 + br i1 %tmp, label %bb37, label %bb38 + + bb37: ; preds = %bb35 + unreachable + + bb38: ; preds = %bb35 + unreachable + } + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #1 + + attributes #0 = { "target-cpu"="arch13" } + attributes #1 = { nounwind } + +... + +# CHECK: selr +# CHECK-NOT: risblg + +--- +name: fun1 +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: grx32bit } + - { id: 1, class: addr64bit } + - { id: 2, class: grx32bit } + - { id: 3, class: addr64bit } + - { id: 4, class: gr32bit } + - { id: 5, class: grx32bit } + - { id: 6, class: gr64bit } + - { id: 7, class: gr64bit } + - { id: 8, class: grx32bit } + - { id: 9, class: grx32bit } + - { id: 10, class: gr64bit } + - { id: 11, class: grx32bit } + - { id: 12, class: gr64bit } + - { id: 13, class: grx32bit } + - { id: 14, class: gr32bit } + - { id: 15, class: gr32bit } + - { id: 16, class: grx32bit } + - { id: 17, class: grx32bit } + - { id: 18, class: gr32bit } + - { id: 19, class: addr64bit } + - { id: 20, class: grx32bit } + - { id: 21, class: gr32bit } + - { id: 22, class: gr64bit } + - { id: 23, class: grx32bit } + - { id: 24, class: grx32bit } + - { id: 25, class: grx32bit } + - { id: 26, class: addr64bit } + - { id: 27, class: grx32bit } + - { id: 28, class: addr64bit } +frameInfo: + hasCalls: true +body: | + bb.0.bb5: + %25:grx32bit = IMPLICIT_DEF + + bb.1.bb6: + %28:addr64bit = LGHI 0 + %27:grx32bit = IIFMux 4293967296 + %26:addr64bit = IMPLICIT_DEF + + bb.2.bb7: + successors: %bb.3(0x04000000), %bb.2(0x7c000000) + + %14:gr32bit = LMux %26, 0, $noreg :: (load 4 from %ir.lsr.iv12) + CR %14, undef %15:gr32bit, implicit-def $cc + %16:grx32bit = COPY %28.subreg_l32 + %16:grx32bit = LOCHIMux %16, 0, 14, 12, implicit $cc + %17:grx32bit = SELRMux %27, %28.subreg_l32, 14, 2, implicit killed $cc + %18:gr32bit = LMux undef %19:addr64bit, 0, $noreg :: (load 4 from `i32* undef`) + %20:grx32bit = COPY %28.subreg_l32 + %20:grx32bit = OILMux %20, 3, implicit-def dead $cc + CR undef %21:gr32bit, %18, implicit-def $cc + %4:gr32bit = SELRMux %16, %20, 14, 2, implicit $cc + %27:grx32bit = SELRMux %17, %20, 14, 2, implicit killed $cc + %28:addr64bit = nuw nsw LA %28, 4, $noreg + %26:addr64bit = LA %26, 16, $noreg + CGHI undef %22:gr64bit, 0, implicit-def $cc + BRC 14, 6, %bb.2, implicit killed $cc + J %bb.3 + + bb.3.bb30: + successors: %bb.4(0x7fffffff), %bb.5(0x00000001) + + CFIMux %27, -999999, implicit-def $cc + BRC 14, 4, %bb.5, implicit killed $cc + J %bb.4 + + bb.4.bb33: + ADJCALLSTACKDOWN 0, 0 + CallBRASL @fun, csr_systemz, implicit-def dead $r14d, implicit-def dead $cc + ADJCALLSTACKUP 0, 0 + STRL %4, @globvar :: (store 4 into @globvar) + CLFIMux undef %23:grx32bit, 1, implicit-def $cc + %25:grx32bit = LHIMux 0 + %25:grx32bit = LOCHIMux %25, 1, 14, 2, implicit killed $cc + J %bb.1 + + bb.5.bb35: + successors: %bb.6, %bb.7 + + TMLMux %25, 1, implicit-def $cc + BRC 15, 8, %bb.7, implicit killed $cc + J %bb.6 + + bb.6.bb37: + successors: + + + bb.7.bb38: + +... diff --git a/llvm/test/CodeGen/SystemZ/ctpop-02.ll b/llvm/test/CodeGen/SystemZ/ctpop-02.ll new file mode 100644 index 00000000000..5b9d41f9af2 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/ctpop-02.ll @@ -0,0 +1,74 @@ +; Test population-count instruction on arch13 +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s + +declare i32 @llvm.ctpop.i32(i32 %a) +declare i64 @llvm.ctpop.i64(i64 %a) + +define i32 @f1(i32 %a) { +; CHECK-LABEL: f1: +; CHECK: llgfr %r0, %r2 +; CHECK: popcnt %r2, %r0, 8 +; CHECK: br %r14 + + %popcnt = call i32 @llvm.ctpop.i32(i32 %a) + ret i32 %popcnt +} + +define i32 @f2(i32 %a) { +; CHECK-LABEL: f2: +; CHECK: llghr %r0, %r2 +; CHECK: popcnt %r2, %r0, 8 +; CHECK: br %r14 + %and = and i32 %a, 65535 + %popcnt = call i32 @llvm.ctpop.i32(i32 %and) + ret i32 %popcnt +} + +define i32 @f3(i32 %a) { +; CHECK-LABEL: f3: +; CHECK: llgcr %r0, %r2 +; CHECK: popcnt %r2, %r0, 8 +; CHECK: br %r14 + %and = and i32 %a, 255 + %popcnt = call i32 @llvm.ctpop.i32(i32 %and) + ret i32 %popcnt +} + +define i64 @f4(i64 %a) { +; CHECK-LABEL: f4: +; CHECK: popcnt %r2, %r2, 8 +; CHECK: br %r14 + %popcnt = call i64 @llvm.ctpop.i64(i64 %a) + ret i64 %popcnt +} + +define i64 @f5(i64 %a) { +; CHECK-LABEL: f5: +; CHECK: llgfr %r0, %r2 +; CHECK: popcnt %r2, %r0, 8 + %and = and i64 %a, 4294967295 + %popcnt = call i64 @llvm.ctpop.i64(i64 %and) + ret i64 %popcnt +} + +define i64 @f6(i64 %a) { +; CHECK-LABEL: f6: +; CHECK: llghr %r0, %r2 +; CHECK: popcnt %r2, %r0, 8 +; CHECK: br %r14 + %and = and i64 %a, 65535 + %popcnt = call i64 @llvm.ctpop.i64(i64 %and) + ret i64 %popcnt +} + +define i64 @f7(i64 %a) { +; CHECK-LABEL: f7: +; CHECK: llgcr %r0, %r2 +; CHECK: popcnt %r2, %r0, 8 +; CHECK: br %r14 + %and = and i64 %a, 255 + %popcnt = call i64 @llvm.ctpop.i64(i64 %and) + ret i64 %popcnt +} + diff --git a/llvm/test/CodeGen/SystemZ/not-01.ll b/llvm/test/CodeGen/SystemZ/not-01.ll new file mode 100644 index 00000000000..3b9dbd1311f --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/not-01.ll @@ -0,0 +1,126 @@ +; Combined logical operations involving complement on arch13 +; +; RUN: llc -mcpu=arch13 < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; And-with-complement 32-bit. +define i32 @f1(i32 %dummy, i32 %a, i32 %b) { +; CHECK-LABEL: f1: +; CHECK: ncrk %r2, %r3, %r4 +; CHECK: br %r14 + %neg = xor i32 %b, -1 + %ret = and i32 %neg, %a + ret i32 %ret +} + +; And-with-complement 64-bit. +define i64 @f2(i64 %dummy, i64 %a, i64 %b) { +; CHECK-LABEL: f2: +; CHECK: ncgrk %r2, %r3, %r4 +; CHECK: br %r14 + %neg = xor i64 %b, -1 + %ret = and i64 %neg, %a + ret i64 %ret +} + +; Or-with-complement 32-bit. +define i32 @f3(i32 %dummy, i32 %a, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: ocrk %r2, %r3, %r4 +; CHECK: br %r14 + %neg = xor i32 %b, -1 + %ret = or i32 %neg, %a + ret i32 %ret +} + +; Or-with-complement 64-bit. +define i64 @f4(i64 %dummy, i64 %a, i64 %b) { +; CHECK-LABEL: f4: +; CHECK: ocgrk %r2, %r3, %r4 +; CHECK: br %r14 + %neg = xor i64 %b, -1 + %ret = or i64 %neg, %a + ret i64 %ret +} + +; NAND 32-bit. +define i32 @f5(i32 %dummy, i32 %a, i32 %b) { +; CHECK-LABEL: f5: +; CHECK: nnrk %r2, %r3, %r4 +; CHECK: br %r14 + %tmp = and i32 %a, %b + %ret = xor i32 %tmp, -1 + ret i32 %ret +} + +; NAND 64-bit. +define i64 @f6(i64 %dummy, i64 %a, i64 %b) { +; CHECK-LABEL: f6: +; CHECK: nngrk %r2, %r3, %r4 +; CHECK: br %r14 + %tmp = and i64 %a, %b + %ret = xor i64 %tmp, -1 + ret i64 %ret +} + +; NOR 32-bit. +define i32 @f7(i32 %dummy, i32 %a, i32 %b) { +; CHECK-LABEL: f7: +; CHECK: nork %r2, %r3, %r4 +; CHECK: br %r14 + %tmp = or i32 %a, %b + %ret = xor i32 %tmp, -1 + ret i32 %ret +} + +; NOR 64-bit. +define i64 @f8(i64 %dummy, i64 %a, i64 %b) { +; CHECK-LABEL: f8: +; CHECK: nogrk %r2, %r3, %r4 +; CHECK: br %r14 + %tmp = or i64 %a, %b + %ret = xor i64 %tmp, -1 + ret i64 %ret +} + +; NXOR 32-bit. +define i32 @f9(i32 %dummy, i32 %a, i32 %b) { +; CHECK-LABEL: f9: +; CHECK: nxrk %r2, %r3, %r4 +; CHECK: br %r14 + %tmp = xor i32 %a, %b + %ret = xor i32 %tmp, -1 + ret i32 %ret +} + +; NXOR 64-bit. +define i64 @f10(i64 %dummy, i64 %a, i64 %b) { +; CHECK-LABEL: f10: +; CHECK: nxgrk %r2, %r3, %r4 +; CHECK: br %r14 + %tmp = xor i64 %a, %b + %ret = xor i64 %tmp, -1 + ret i64 %ret +} + +; Or-with-complement 32-bit of a constant. +define i32 @f11(i32 %a) { +; CHECK-LABEL: f11: +; CHECK: lhi [[REG:%r[0-5]]], -256 +; CHECK: ocrk %r2, [[REG]], %r2 +; CHECK: br %r14 + %neg = xor i32 %a, -1 + %ret = or i32 %neg, -256 + ret i32 %ret +} + +; Or-with-complement 64-bit of a constant. +define i64 @f12(i64 %a) { +; CHECK-LABEL: f12: +; CHECK: lghi [[REG:%r[0-5]]], -256 +; CHECK: ocgrk %r2, [[REG]], %r2 +; CHECK: br %r14 + %neg = xor i64 %a, -1 + %ret = or i64 %neg, -256 + ret i64 %ret +} + diff --git a/llvm/test/CodeGen/SystemZ/vec-bswap-01.ll b/llvm/test/CodeGen/SystemZ/vec-bswap-01.ll new file mode 100644 index 00000000000..8132108953a --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-bswap-01.ll @@ -0,0 +1,97 @@ +; Test loads of byte-swapped vector elements. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s + +declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>) +declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) +declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) + +; Test v8i16 loads. +define <8 x i16> @f1(<8 x i16> *%ptr) { +; CHECK-LABEL: f1: +; CHECK: vlbrh %v24, 0(%r2) +; CHECK: br %r14 + %load = load <8 x i16>, <8 x i16> *%ptr + %ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %load) + ret <8 x i16> %ret +} + +; Test v4i32 loads. +define <4 x i32> @f2(<4 x i32> *%ptr) { +; CHECK-LABEL: f2: +; CHECK: vlbrf %v24, 0(%r2) +; CHECK: br %r14 + %load = load <4 x i32>, <4 x i32> *%ptr + %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load) + ret <4 x i32> %ret +} + +; Test v2i64 loads. +define <2 x i64> @f3(<2 x i64> *%ptr) { +; CHECK-LABEL: f3: +; CHECK: vlbrg %v24, 0(%r2) +; CHECK: br %r14 + %load = load <2 x i64>, <2 x i64> *%ptr + %ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %load) + ret <2 x i64> %ret +} + +; Test the highest aligned in-range offset. +define <4 x i32> @f4(<4 x i32> *%base) { +; CHECK-LABEL: f4: +; CHECK: vlbrf %v24, 4080(%r2) +; CHECK: br %r14 + %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 255 + %load = load <4 x i32>, <4 x i32> *%ptr + %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load) + ret <4 x i32> %ret +} + +; Test the highest unaligned in-range offset. +define <4 x i32> @f5(i8 *%base) { +; CHECK-LABEL: f5: +; CHECK: vlbrf %v24, 4095(%r2) +; CHECK: br %r14 + %addr = getelementptr i8, i8 *%base, i64 4095 + %ptr = bitcast i8 *%addr to <4 x i32> * + %load = load <4 x i32>, <4 x i32> *%ptr + %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load) + ret <4 x i32> %ret +} + +; Test the next offset up, which requires separate address logic, +define <4 x i32> @f6(<4 x i32> *%base) { +; CHECK-LABEL: f6: +; CHECK: aghi %r2, 4096 +; CHECK: vlbrf %v24, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 256 + %load = load <4 x i32>, <4 x i32> *%ptr + %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load) + ret <4 x i32> %ret +} + +; Test negative offsets, which also require separate address logic, +define <4 x i32> @f7(<4 x i32> *%base) { +; CHECK-LABEL: f7: +; CHECK: aghi %r2, -16 +; CHECK: vlbrf %v24, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 -1 + %load = load <4 x i32>, <4 x i32> *%ptr + %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load) + ret <4 x i32> %ret +} + +; Check that indexes are allowed. +define <4 x i32> @f8(i8 *%base, i64 %index) { +; CHECK-LABEL: f8: +; CHECK: vlbrf %v24, 0(%r3,%r2) +; CHECK: br %r14 + %addr = getelementptr i8, i8 *%base, i64 %index + %ptr = bitcast i8 *%addr to <4 x i32> * + %load = load <4 x i32>, <4 x i32> *%ptr + %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load) + ret <4 x i32> %ret +} + diff --git a/llvm/test/CodeGen/SystemZ/vec-bswap-02.ll b/llvm/test/CodeGen/SystemZ/vec-bswap-02.ll new file mode 100644 index 00000000000..ff7facc0910 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-bswap-02.ll @@ -0,0 +1,97 @@ +; Test stores of byte-swapped vector elements. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s + +declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>) +declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) +declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) + +; Test v8i16 stores. +define void @f1(<8 x i16> %val, <8 x i16> *%ptr) { +; CHECK-LABEL: f1: +; CHECK: vstbrh %v24, 0(%r2) +; CHECK: br %r14 + %swap = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %val) + store <8 x i16> %swap, <8 x i16> *%ptr + ret void +} + +; Test v4i32 stores. +define void @f2(<4 x i32> %val, <4 x i32> *%ptr) { +; CHECK-LABEL: f2: +; CHECK: vstbrf %v24, 0(%r2) +; CHECK: br %r14 + %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) + store <4 x i32> %swap, <4 x i32> *%ptr + ret void +} + +; Test v2i64 stores. +define void @f3(<2 x i64> %val, <2 x i64> *%ptr) { +; CHECK-LABEL: f3: +; CHECK: vstbrg %v24, 0(%r2) +; CHECK: br %r14 + %swap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val) + store <2 x i64> %swap, <2 x i64> *%ptr + ret void +} + +; Test the highest aligned in-range offset. +define void @f4(<4 x i32> %val, <4 x i32> *%base) { +; CHECK-LABEL: f4: +; CHECK: vstbrf %v24, 4080(%r2) +; CHECK: br %r14 + %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 255 + %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) + store <4 x i32> %swap, <4 x i32> *%ptr + ret void +} + +; Test the highest unaligned in-range offset. +define void @f5(<4 x i32> %val, i8 *%base) { +; CHECK-LABEL: f5: +; CHECK: vstbrf %v24, 4095(%r2) +; CHECK: br %r14 + %addr = getelementptr i8, i8 *%base, i64 4095 + %ptr = bitcast i8 *%addr to <4 x i32> * + %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) + store <4 x i32> %swap, <4 x i32> *%ptr, align 1 + ret void +} + +; Test the next offset up, which requires separate address logic, +define void @f6(<4 x i32> %val, <4 x i32> *%base) { +; CHECK-LABEL: f6: +; CHECK: aghi %r2, 4096 +; CHECK: vstbrf %v24, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 256 + %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) + store <4 x i32> %swap, <4 x i32> *%ptr + ret void +} + +; Test negative offsets, which also require separate address logic, +define void @f7(<4 x i32> %val, <4 x i32> *%base) { +; CHECK-LABEL: f7: +; CHECK: aghi %r2, -16 +; CHECK: vstbrf %v24, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 -1 + %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) + store <4 x i32> %swap, <4 x i32> *%ptr + ret void +} + +; Check that indexes are allowed. +define void @f8(<4 x i32> %val, i8 *%base, i64 %index) { +; CHECK-LABEL: f8: +; CHECK: vstbrf %v24, 0(%r3,%r2) +; CHECK: br %r14 + %addr = getelementptr i8, i8 *%base, i64 %index + %ptr = bitcast i8 *%addr to <4 x i32> * + %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) + store <4 x i32> %swap, <4 x i32> *%ptr, align 1 + ret void +} + diff --git a/llvm/test/CodeGen/SystemZ/vec-bswap-03.ll b/llvm/test/CodeGen/SystemZ/vec-bswap-03.ll new file mode 100644 index 00000000000..9102c739b12 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-bswap-03.ll @@ -0,0 +1,220 @@ +; Test vector insertion of byte-swapped memory values. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s + +declare i16 @llvm.bswap.i16(i16) +declare i32 @llvm.bswap.i32(i32) +declare i64 @llvm.bswap.i64(i64) +declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>) +declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) +declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) + +; Test v8i16 insertion into the first element. +define <8 x i16> @f1(<8 x i16> %val, i16 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: vlebrh %v24, 0(%r2), 0 +; CHECK: br %r14 + %element = load i16, i16 *%ptr + %swap = call i16 @llvm.bswap.i16(i16 %element) + %ret = insertelement <8 x i16> %val, i16 %swap, i32 0 + ret <8 x i16> %ret +} + +; Test v8i16 insertion into the last element. +define <8 x i16> @f2(<8 x i16> %val, i16 *%ptr) { +; CHECK-LABEL: f2: +; CHECK: vlebrh %v24, 0(%r2), 7 +; CHECK: br %r14 + %element = load i16, i16 *%ptr + %swap = call i16 @llvm.bswap.i16(i16 %element) + %ret = insertelement <8 x i16> %val, i16 %swap, i32 7 + ret <8 x i16> %ret +} + +; Test v8i16 insertion with the highest in-range offset. +define <8 x i16> @f3(<8 x i16> %val, i16 *%base) { +; CHECK-LABEL: f3: +; CHECK: vlebrh %v24, 4094(%r2), 5 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%base, i32 2047 + %element = load i16, i16 *%ptr + %swap = call i16 @llvm.bswap.i16(i16 %element) + %ret = insertelement <8 x i16> %val, i16 %swap, i32 5 + ret <8 x i16> %ret +} + +; Test v8i16 insertion with the first ouf-of-range offset. +define <8 x i16> @f4(<8 x i16> %val, i16 *%base) { +; CHECK-LABEL: f4: +; CHECK: aghi %r2, 4096 +; CHECK: vlebrh %v24, 0(%r2), 1 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%base, i32 2048 + %element = load i16, i16 *%ptr + %swap = call i16 @llvm.bswap.i16(i16 %element) + %ret = insertelement <8 x i16> %val, i16 %swap, i32 1 + ret <8 x i16> %ret +} + +; Test v8i16 insertion into a variable element. +define <8 x i16> @f5(<8 x i16> %val, i16 *%ptr, i32 %index) { +; CHECK-LABEL: f5: +; CHECK-NOT: vlebrh +; CHECK: br %r14 + %element = load i16, i16 *%ptr + %swap = call i16 @llvm.bswap.i16(i16 %element) + %ret = insertelement <8 x i16> %val, i16 %swap, i32 %index + ret <8 x i16> %ret +} + +; Test v8i16 insertion using a pair of vector bswaps. +define <8 x i16> @f6(<8 x i16> %val, i16 *%ptr) { +; CHECK-LABEL: f6: +; CHECK: vlebrh %v24, 0(%r2), 0 +; CHECK: br %r14 + %element = load i16, i16 *%ptr + %swapval = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %val) + %insert = insertelement <8 x i16> %swapval, i16 %element, i32 0 + %ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %insert) + ret <8 x i16> %ret +} + +; Test v4i32 insertion into the first element. +define <4 x i32> @f7(<4 x i32> %val, i32 *%ptr) { +; CHECK-LABEL: f7: +; CHECK: vlebrf %v24, 0(%r2), 0 +; CHECK: br %r14 + %element = load i32, i32 *%ptr + %swap = call i32 @llvm.bswap.i32(i32 %element) + %ret = insertelement <4 x i32> %val, i32 %swap, i32 0 + ret <4 x i32> %ret +} + +; Test v4i32 insertion into the last element. +define <4 x i32> @f8(<4 x i32> %val, i32 *%ptr) { +; CHECK-LABEL: f8: +; CHECK: vlebrf %v24, 0(%r2), 3 +; CHECK: br %r14 + %element = load i32, i32 *%ptr + %swap = call i32 @llvm.bswap.i32(i32 %element) + %ret = insertelement <4 x i32> %val, i32 %swap, i32 3 + ret <4 x i32> %ret +} + +; Test v4i32 insertion with the highest in-range offset. +define <4 x i32> @f9(<4 x i32> %val, i32 *%base) { +; CHECK-LABEL: f9: +; CHECK: vlebrf %v24, 4092(%r2), 2 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i32 1023 + %element = load i32, i32 *%ptr + %swap = call i32 @llvm.bswap.i32(i32 %element) + %ret = insertelement <4 x i32> %val, i32 %swap, i32 2 + ret <4 x i32> %ret +} + +; Test v4i32 insertion with the first ouf-of-range offset. +define <4 x i32> @f10(<4 x i32> %val, i32 *%base) { +; CHECK-LABEL: f10: +; CHECK: aghi %r2, 4096 +; CHECK: vlebrf %v24, 0(%r2), 1 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i32 1024 + %element = load i32, i32 *%ptr + %swap = call i32 @llvm.bswap.i32(i32 %element) + %ret = insertelement <4 x i32> %val, i32 %swap, i32 1 + ret <4 x i32> %ret +} + +; Test v4i32 insertion into a variable element. +define <4 x i32> @f11(<4 x i32> %val, i32 *%ptr, i32 %index) { +; CHECK-LABEL: f11: +; CHECK-NOT: vlebrf +; CHECK: br %r14 + %element = load i32, i32 *%ptr + %swap = call i32 @llvm.bswap.i32(i32 %element) + %ret = insertelement <4 x i32> %val, i32 %swap, i32 %index + ret <4 x i32> %ret +} + +; Test v4i32 insertion using a pair of vector bswaps. +define <4 x i32> @f12(<4 x i32> %val, i32 *%ptr) { +; CHECK-LABEL: f12: +; CHECK: vlebrf %v24, 0(%r2), 0 +; CHECK: br %r14 + %element = load i32, i32 *%ptr + %swapval = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) + %insert = insertelement <4 x i32> %swapval, i32 %element, i32 0 + %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %insert) + ret <4 x i32> %ret +} + +; Test v2i64 insertion into the first element. +define <2 x i64> @f13(<2 x i64> %val, i64 *%ptr) { +; CHECK-LABEL: f13: +; CHECK: vlebrg %v24, 0(%r2), 0 +; CHECK: br %r14 + %element = load i64, i64 *%ptr + %swap = call i64 @llvm.bswap.i64(i64 %element) + %ret = insertelement <2 x i64> %val, i64 %swap, i32 0 + ret <2 x i64> %ret +} + +; Test v2i64 insertion into the last element. +define <2 x i64> @f14(<2 x i64> %val, i64 *%ptr) { +; CHECK-LABEL: f14: +; CHECK: vlebrg %v24, 0(%r2), 1 +; CHECK: br %r14 + %element = load i64, i64 *%ptr + %swap = call i64 @llvm.bswap.i64(i64 %element) + %ret = insertelement <2 x i64> %val, i64 %swap, i32 1 + ret <2 x i64> %ret +} + +; Test v2i64 insertion with the highest in-range offset. +define <2 x i64> @f15(<2 x i64> %val, i64 *%base) { +; CHECK-LABEL: f15: +; CHECK: vlebrg %v24, 4088(%r2), 1 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i32 511 + %element = load i64, i64 *%ptr + %swap = call i64 @llvm.bswap.i64(i64 %element) + %ret = insertelement <2 x i64> %val, i64 %swap, i32 1 + ret <2 x i64> %ret +} + +; Test v2i64 insertion with the first ouf-of-range offset. +define <2 x i64> @f16(<2 x i64> %val, i64 *%base) { +; CHECK-LABEL: f16: +; CHECK: aghi %r2, 4096 +; CHECK: vlebrg %v24, 0(%r2), 0 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i32 512 + %element = load i64, i64 *%ptr + %swap = call i64 @llvm.bswap.i64(i64 %element) + %ret = insertelement <2 x i64> %val, i64 %swap, i32 0 + ret <2 x i64> %ret +} + +; Test v2i64 insertion into a variable element. +define <2 x i64> @f17(<2 x i64> %val, i64 *%ptr, i32 %index) { +; CHECK-LABEL: f17: +; CHECK-NOT: vlebrg +; CHECK: br %r14 + %element = load i64, i64 *%ptr + %swap = call i64 @llvm.bswap.i64(i64 %element) + %ret = insertelement <2 x i64> %val, i64 %swap, i32 %index + ret <2 x i64> %ret +} + +; Test v2i64 insertion using a pair of vector bswaps. +define <2 x i64> @f18(<2 x i64> %val, i64 *%ptr) { +; CHECK-LABEL: f18: +; CHECK: vlebrg %v24, 0(%r2), 0 +; CHECK: br %r14 + %element = load i64, i64 *%ptr + %swapval = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val) + %insert = insertelement <2 x i64> %swapval, i64 %element, i32 0 + %ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %insert) + ret <2 x i64> %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-bswap-04.ll b/llvm/test/CodeGen/SystemZ/vec-bswap-04.ll new file mode 100644 index 00000000000..43787a48b2e --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-bswap-04.ll @@ -0,0 +1,254 @@ +; Test vector extraction of byte-swapped value to memory. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s + +declare i16 @llvm.bswap.i16(i16) +declare i32 @llvm.bswap.i32(i32) +declare i64 @llvm.bswap.i64(i64) +declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>) +declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) +declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) + +; Test v8i16 extraction from the first element. +define void @f1(<8 x i16> %val, i16 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: vstebrh %v24, 0(%r2), 0 +; CHECK: br %r14 + %element = extractelement <8 x i16> %val, i32 0 + %swap = call i16 @llvm.bswap.i16(i16 %element) + store i16 %swap, i16 *%ptr + ret void +} + +; Test v8i16 extraction from the last element. +define void @f2(<8 x i16> %val, i16 *%ptr) { +; CHECK-LABEL: f2: +; CHECK: vstebrh %v24, 0(%r2), 7 +; CHECK: br %r14 + %element = extractelement <8 x i16> %val, i32 7 + %swap = call i16 @llvm.bswap.i16(i16 %element) + store i16 %swap, i16 *%ptr + ret void +} + +; Test v8i16 extraction of an invalid element. This must compile, +; but we don't care what it does. +define void @f3(<8 x i16> %val, i16 *%ptr) { +; CHECK-LABEL: f3: +; CHECK-NOT: vstebrh %v24, 0(%r2), 8 +; CHECK: br %r14 + %element = extractelement <8 x i16> %val, i32 8 + %swap = call i16 @llvm.bswap.i16(i16 %element) + store i16 %swap, i16 *%ptr + ret void +} + +; Test v8i16 extraction with the highest in-range offset. +define void @f4(<8 x i16> %val, i16 *%base) { +; CHECK-LABEL: f4: +; CHECK: vstebrh %v24, 4094(%r2), 5 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%base, i32 2047 + %element = extractelement <8 x i16> %val, i32 5 + %swap = call i16 @llvm.bswap.i16(i16 %element) + store i16 %swap, i16 *%ptr + ret void +} + +; Test v8i16 extraction with the first ouf-of-range offset. +define void @f5(<8 x i16> %val, i16 *%base) { +; CHECK-LABEL: f5: +; CHECK: aghi %r2, 4096 +; CHECK: vstebrh %v24, 0(%r2), 1 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%base, i32 2048 + %element = extractelement <8 x i16> %val, i32 1 + %swap = call i16 @llvm.bswap.i16(i16 %element) + store i16 %swap, i16 *%ptr + ret void +} + +; Test v8i16 extraction from a variable element. +define void @f6(<8 x i16> %val, i16 *%ptr, i32 %index) { +; CHECK-LABEL: f6: +; CHECK-NOT: vstebrh +; CHECK: br %r14 + %element = extractelement <8 x i16> %val, i32 %index + %swap = call i16 @llvm.bswap.i16(i16 %element) + store i16 %swap, i16 *%ptr + ret void +} + +; Test v8i16 extraction using a vector bswap. +define void @f7(<8 x i16> %val, i16 *%ptr) { +; CHECK-LABEL: f7: +; CHECK: vstebrh %v24, 0(%r2), 0 +; CHECK: br %r14 + %swap = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %val) + %element = extractelement <8 x i16> %swap, i32 0 + store i16 %element, i16 *%ptr + ret void +} + +; Test v4i32 extraction from the first element. +define void @f8(<4 x i32> %val, i32 *%ptr) { +; CHECK-LABEL: f8: +; CHECK: vstebrf %v24, 0(%r2), 0 +; CHECK: br %r14 + %element = extractelement <4 x i32> %val, i32 0 + %swap = call i32 @llvm.bswap.i32(i32 %element) + store i32 %swap, i32 *%ptr + ret void +} + +; Test v4i32 extraction from the last element. +define void @f9(<4 x i32> %val, i32 *%ptr) { +; CHECK-LABEL: f9: +; CHECK: vstebrf %v24, 0(%r2), 3 +; CHECK: br %r14 + %element = extractelement <4 x i32> %val, i32 3 + %swap = call i32 @llvm.bswap.i32(i32 %element) + store i32 %swap, i32 *%ptr + ret void +} + +; Test v4i32 extraction of an invalid element. This must compile, +; but we don't care what it does. +define void @f10(<4 x i32> %val, i32 *%ptr) { +; CHECK-LABEL: f10: +; CHECK-NOT: vstebrf %v24, 0(%r2), 4 +; CHECK: br %r14 + %element = extractelement <4 x i32> %val, i32 4 + %swap = call i32 @llvm.bswap.i32(i32 %element) + store i32 %swap, i32 *%ptr + ret void +} + +; Test v4i32 extraction with the highest in-range offset. +define void @f11(<4 x i32> %val, i32 *%base) { +; CHECK-LABEL: f11: +; CHECK: vstebrf %v24, 4092(%r2), 2 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i32 1023 + %element = extractelement <4 x i32> %val, i32 2 + %swap = call i32 @llvm.bswap.i32(i32 %element) + store i32 %swap, i32 *%ptr + ret void +} + +; Test v4i32 extraction with the first ouf-of-range offset. +define void @f12(<4 x i32> %val, i32 *%base) { +; CHECK-LABEL: f12: +; CHECK: aghi %r2, 4096 +; CHECK: vstebrf %v24, 0(%r2), 1 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i32 1024 + %element = extractelement <4 x i32> %val, i32 1 + %swap = call i32 @llvm.bswap.i32(i32 %element) + store i32 %swap, i32 *%ptr + ret void +} + +; Test v4i32 extraction from a variable element. +define void @f13(<4 x i32> %val, i32 *%ptr, i32 %index) { +; CHECK-LABEL: f13: +; CHECK-NOT: vstebrf +; CHECK: br %r14 + %element = extractelement <4 x i32> %val, i32 %index + %swap = call i32 @llvm.bswap.i32(i32 %element) + store i32 %swap, i32 *%ptr + ret void +} + +; Test v4i32 extraction using a vector bswap. +define void @f14(<4 x i32> %val, i32 *%ptr) { +; CHECK-LABEL: f14: +; CHECK: vstebrf %v24, 0(%r2), 0 +; CHECK: br %r14 + %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) + %element = extractelement <4 x i32> %swap, i32 0 + store i32 %element, i32 *%ptr + ret void +} + +; Test v2i64 extraction from the first element. +define void @f15(<2 x i64> %val, i64 *%ptr) { +; CHECK-LABEL: f15: +; CHECK: vstebrg %v24, 0(%r2), 0 +; CHECK: br %r14 + %element = extractelement <2 x i64> %val, i32 0 + %swap = call i64 @llvm.bswap.i64(i64 %element) + store i64 %swap, i64 *%ptr + ret void +} + +; Test v2i64 extraction from the last element. +define void @f16(<2 x i64> %val, i64 *%ptr) { +; CHECK-LABEL: f16: +; CHECK: vstebrg %v24, 0(%r2), 1 +; CHECK: br %r14 + %element = extractelement <2 x i64> %val, i32 1 + %swap = call i64 @llvm.bswap.i64(i64 %element) + store i64 %swap, i64 *%ptr + ret void +} + +; Test v2i64 extraction of an invalid element. This must compile, +; but we don't care what it does. +define void @f17(<2 x i64> %val, i64 *%ptr) { +; CHECK-LABEL: f17: +; CHECK-NOT: vstebrg %v24, 0(%r2), 2 +; CHECK: br %r14 + %element = extractelement <2 x i64> %val, i32 2 + %swap = call i64 @llvm.bswap.i64(i64 %element) + store i64 %swap, i64 *%ptr + ret void +} + +; Test v2i64 extraction with the highest in-range offset. +define void @f18(<2 x i64> %val, i64 *%base) { +; CHECK-LABEL: f18: +; CHECK: vstebrg %v24, 4088(%r2), 1 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i32 511 + %element = extractelement <2 x i64> %val, i32 1 + %swap = call i64 @llvm.bswap.i64(i64 %element) + store i64 %swap, i64 *%ptr + ret void +} + +; Test v2i64 extraction with the first ouf-of-range offset. +define void @f19(<2 x i64> %val, i64 *%base) { +; CHECK-LABEL: f19: +; CHECK: aghi %r2, 4096 +; CHECK: vstebrg %v24, 0(%r2), 0 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i32 512 + %element = extractelement <2 x i64> %val, i32 0 + %swap = call i64 @llvm.bswap.i64(i64 %element) + store i64 %swap, i64 *%ptr + ret void +} + +; Test v2i64 extraction from a variable element. +define void @f20(<2 x i64> %val, i64 *%ptr, i32 %index) { +; CHECK-LABEL: f20: +; CHECK-NOT: vstebrg +; CHECK: br %r14 + %element = extractelement <2 x i64> %val, i32 %index + %swap = call i64 @llvm.bswap.i64(i64 %element) + store i64 %swap, i64 *%ptr + ret void +} + +; Test v2i64 extraction using a vector bswap. +define void @f21(<2 x i64> %val, i64 *%ptr) { +; CHECK-LABEL: f21: +; CHECK: vstebrg %v24, 0(%r2), 0 +; CHECK: br %r14 + %swap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val) + %element = extractelement <2 x i64> %swap, i32 0 + store i64 %element, i64 *%ptr + ret void +} + diff --git a/llvm/test/CodeGen/SystemZ/vec-bswap-05.ll b/llvm/test/CodeGen/SystemZ/vec-bswap-05.ll new file mode 100644 index 00000000000..038a3f4f4f5 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-bswap-05.ll @@ -0,0 +1,136 @@ +; Test vector insertions of byte-swapped memory values into 0. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s + +declare i16 @llvm.bswap.i16(i16) +declare i32 @llvm.bswap.i32(i32) +declare i64 @llvm.bswap.i64(i64) +declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>) +declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) +declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) + +; Test VLLEBRZH. +define <8 x i16> @f1(i16 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: vllebrzh %v24, 0(%r2) +; CHECK: br %r14 + %val = load i16, i16 *%ptr + %swap = call i16 @llvm.bswap.i16(i16 %val) + %ret = insertelement <8 x i16> zeroinitializer, i16 %swap, i32 3 + ret <8 x i16> %ret +} + +; Test VLLEBRZH using a vector bswap. +define <8 x i16> @f2(i16 *%ptr) { +; CHECK-LABEL: f2: +; CHECK: vllebrzh %v24, 0(%r2) +; CHECK: br %r14 + %val = load i16, i16 *%ptr + %insert = insertelement <8 x i16> zeroinitializer, i16 %val, i32 3 + %ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %insert) + ret <8 x i16> %ret +} + +; Test VLLEBRZF. +define <4 x i32> @f3(i32 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: vllebrzf %v24, 0(%r2) +; CHECK: br %r14 + %val = load i32, i32 *%ptr + %swap = call i32 @llvm.bswap.i32(i32 %val) + %ret = insertelement <4 x i32> zeroinitializer, i32 %swap, i32 1 + ret <4 x i32> %ret +} + +; Test VLLEBRZF using a vector bswap. +define <4 x i32> @f4(i32 *%ptr) { +; CHECK-LABEL: f4: +; CHECK: vllebrzf %v24, 0(%r2) +; CHECK: br %r14 + %val = load i32, i32 *%ptr + %insert = insertelement <4 x i32> zeroinitializer, i32 %val, i32 1 + %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %insert) + ret <4 x i32> %ret +} + +; Test VLLEBRZG. +define <2 x i64> @f5(i64 *%ptr) { +; CHECK-LABEL: f5: +; CHECK: vllebrzg %v24, 0(%r2) +; CHECK: br %r14 + %val = load i64, i64 *%ptr + %swap = call i64 @llvm.bswap.i64(i64 %val) + %ret = insertelement <2 x i64> zeroinitializer, i64 %swap, i32 0 + ret <2 x i64> %ret +} + +; Test VLLEBRZG using a vector bswap. +define <2 x i64> @f6(i64 *%ptr) { +; CHECK-LABEL: f6: +; CHECK: vllebrzg %v24, 0(%r2) +; CHECK: br %r14 + %val = load i64, i64 *%ptr + %insert = insertelement <2 x i64> zeroinitializer, i64 %val, i32 0 + %ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %insert) + ret <2 x i64> %ret +} + +; Test VLLEBRZE. +define <4 x i32> @f7(i32 *%ptr) { +; CHECK-LABEL: f7: +; CHECK: vllebrze %v24, 0(%r2) +; CHECK: br %r14 + %val = load i32, i32 *%ptr + %swap = call i32 @llvm.bswap.i32(i32 %val) + %ret = insertelement <4 x i32> zeroinitializer, i32 %swap, i32 0 + ret <4 x i32> %ret +} + +; Test VLLEBRZE using a vector bswap. +define <4 x i32> @f8(i32 *%ptr) { +; CHECK-LABEL: f8: +; CHECK: vllebrze %v24, 0(%r2) +; CHECK: br %r14 + %val = load i32, i32 *%ptr + %insert = insertelement <4 x i32> zeroinitializer, i32 %val, i32 0 + %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %insert) + ret <4 x i32> %ret +} + +; Test VLLEBRZH with the highest in-range offset. +define <8 x i16> @f9(i16 *%base) { +; CHECK-LABEL: f9: +; CHECK: vllebrzh %v24, 4094(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%base, i64 2047 + %val = load i16, i16 *%ptr + %swap = call i16 @llvm.bswap.i16(i16 %val) + %ret = insertelement <8 x i16> zeroinitializer, i16 %swap, i32 3 + ret <8 x i16> %ret +} + +; Test VLLEBRZH with the next highest offset. +define <8 x i16> @f10(i16 *%base) { +; CHECK-LABEL: f10: +; CHECK-NOT: vllebrzh %v24, 4096(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%base, i64 2048 + %val = load i16, i16 *%ptr + %swap = call i16 @llvm.bswap.i16(i16 %val) + %ret = insertelement <8 x i16> zeroinitializer, i16 %swap, i32 3 + ret <8 x i16> %ret +} + +; Test that VLLEBRZH allows an index. +define <8 x i16> @f11(i16 *%base, i64 %index) { +; CHECK-LABEL: f11: +; CHECK: sllg [[REG:%r[1-5]]], %r3, 1 +; CHECK: vllebrzh %v24, 0([[REG]],%r2) +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%base, i64 %index + %val = load i16, i16 *%ptr + %swap = call i16 @llvm.bswap.i16(i16 %val) + %ret = insertelement <8 x i16> zeroinitializer, i16 %swap, i32 3 + ret <8 x i16> %ret +} + diff --git a/llvm/test/CodeGen/SystemZ/vec-bswap-06.ll b/llvm/test/CodeGen/SystemZ/vec-bswap-06.ll new file mode 100644 index 00000000000..0c78633ee7d --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-bswap-06.ll @@ -0,0 +1,77 @@ +; Test insertions of byte-swapped memory values into a nonzero index of an undef. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s + +declare i16 @llvm.bswap.i16(i16) +declare i32 @llvm.bswap.i32(i32) +declare i64 @llvm.bswap.i64(i64) +declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>) +declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) +declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) + +; Test v8i16 insertion into an undef, with an arbitrary index. +define <8 x i16> @f1(i16 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: vlbrreph %v24, 0(%r2) +; CHECK-NEXT: br %r14 + %val = load i16, i16 *%ptr + %swap = call i16 @llvm.bswap.i16(i16 %val) + %ret = insertelement <8 x i16> undef, i16 %swap, i32 5 + ret <8 x i16> %ret +} + +; Test v8i16 insertion into an undef, using a vector bswap. +define <8 x i16> @f2(i16 *%ptr) { +; CHECK-LABEL: f2: +; CHECK: vlbrreph %v24, 0(%r2) +; CHECK-NEXT: br %r14 + %val = load i16, i16 *%ptr + %insert = insertelement <8 x i16> undef, i16 %val, i32 5 + %ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %insert) + ret <8 x i16> %ret +} + +; Test v4i32 insertion into an undef, with an arbitrary index. +define <4 x i32> @f3(i32 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: vlbrrepf %v24, 0(%r2) +; CHECK-NEXT: br %r14 + %val = load i32, i32 *%ptr + %swap = call i32 @llvm.bswap.i32(i32 %val) + %ret = insertelement <4 x i32> undef, i32 %swap, i32 2 + ret <4 x i32> %ret +} + +; Test v4i32 insertion into an undef, using a vector bswap. +define <4 x i32> @f4(i32 *%ptr) { +; CHECK-LABEL: f4: +; CHECK: vlbrrepf %v24, 0(%r2) +; CHECK-NEXT: br %r14 + %val = load i32, i32 *%ptr + %insert = insertelement <4 x i32> undef, i32 %val, i32 2 + %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %insert) + ret <4 x i32> %ret +} + +; Test v2i64 insertion into an undef, with an arbitrary index. +define <2 x i64> @f5(i64 *%ptr) { +; CHECK-LABEL: f5: +; CHECK: vlbrrepg %v24, 0(%r2) +; CHECK-NEXT: br %r14 + %val = load i64, i64 *%ptr + %swap = call i64 @llvm.bswap.i64(i64 %val) + %ret = insertelement <2 x i64> undef, i64 %swap, i32 1 + ret <2 x i64> %ret +} + +; Test v2i64 insertion into an undef, using a vector bwap. +define <2 x i64> @f6(i64 *%ptr) { +; CHECK-LABEL: f6: +; CHECK: vlbrrepg %v24, 0(%r2) +; CHECK-NEXT: br %r14 + %val = load i64, i64 *%ptr + %insert = insertelement <2 x i64> undef, i64 %val, i32 1 + %ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %insert) + ret <2 x i64> %ret +} + diff --git a/llvm/test/CodeGen/SystemZ/vec-bswap-07.ll b/llvm/test/CodeGen/SystemZ/vec-bswap-07.ll new file mode 100644 index 00000000000..0190184c1e2 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-bswap-07.ll @@ -0,0 +1,192 @@ +; Test replications of a byte-swapped scalar memory value. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s + +declare i16 @llvm.bswap.i16(i16) +declare i32 @llvm.bswap.i32(i32) +declare i64 @llvm.bswap.i64(i64) +declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>) +declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) +declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) + +; Test a v8i16 replicating load with no offset. +define <8 x i16> @f1(i16 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: vlbrreph %v24, 0(%r2) +; CHECK: br %r14 + %scalar = load i16, i16 *%ptr + %swap = call i16 @llvm.bswap.i16(i16 %scalar) + %val = insertelement <8 x i16> undef, i16 %swap, i32 0 + %ret = shufflevector <8 x i16> %val, <8 x i16> undef, + <8 x i32> zeroinitializer + ret <8 x i16> %ret +} + +; Test a v8i16 replicating load with the maximum in-range offset. +define <8 x i16> @f2(i16 *%base) { +; CHECK-LABEL: f2: +; CHECK: vlbrreph %v24, 4094(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%base, i64 2047 + %scalar = load i16, i16 *%ptr + %swap = call i16 @llvm.bswap.i16(i16 %scalar) + %val = insertelement <8 x i16> undef, i16 %swap, i32 0 + %ret = shufflevector <8 x i16> %val, <8 x i16> undef, + <8 x i32> zeroinitializer + ret <8 x i16> %ret +} + +; Test a v8i16 replicating load with the first out-of-range offset. +define <8 x i16> @f3(i16 *%base) { +; CHECK-LABEL: f3: +; CHECK: aghi %r2, 4096 +; CHECK: vlbrreph %v24, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%base, i64 2048 + %scalar = load i16, i16 *%ptr + %swap = call i16 @llvm.bswap.i16(i16 %scalar) + %val = insertelement <8 x i16> undef, i16 %swap, i32 0 + %ret = shufflevector <8 x i16> %val, <8 x i16> undef, + <8 x i32> zeroinitializer + ret <8 x i16> %ret +} + +; Test a v8i16 replicating load using a vector bswap. +define <8 x i16> @f4(i16 *%ptr) { +; CHECK-LABEL: f4: +; CHECK: vlbrreph %v24, 0(%r2) +; CHECK: br %r14 + %scalar = load i16, i16 *%ptr + %val = insertelement <8 x i16> undef, i16 %scalar, i32 0 + %rep = shufflevector <8 x i16> %val, <8 x i16> undef, + <8 x i32> zeroinitializer + %ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %rep) + ret <8 x i16> %ret +} + +; Test a v4i32 replicating load with no offset. +define <4 x i32> @f5(i32 *%ptr) { +; CHECK-LABEL: f5: +; CHECK: vlbrrepf %v24, 0(%r2) +; CHECK: br %r14 + %scalar = load i32, i32 *%ptr + %swap = call i32 @llvm.bswap.i32(i32 %scalar) + %val = insertelement <4 x i32> undef, i32 %swap, i32 0 + %ret = shufflevector <4 x i32> %val, <4 x i32> undef, + <4 x i32> zeroinitializer + ret <4 x i32> %ret +} + +; Test a v4i32 replicating load with the maximum in-range offset. +define <4 x i32> @f6(i32 *%base) { +; CHECK-LABEL: f6: +; CHECK: vlbrrepf %v24, 4092(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 1023 + %scalar = load i32, i32 *%ptr + %swap = call i32 @llvm.bswap.i32(i32 %scalar) + %val = insertelement <4 x i32> undef, i32 %swap, i32 0 + %ret = shufflevector <4 x i32> %val, <4 x i32> undef, + <4 x i32> zeroinitializer + ret <4 x i32> %ret +} + +; Test a v4i32 replicating load with the first out-of-range offset. +define <4 x i32> @f7(i32 *%base) { +; CHECK-LABEL: f7: +; CHECK: aghi %r2, 4096 +; CHECK: vlbrrepf %v24, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 1024 + %scalar = load i32, i32 *%ptr + %swap = call i32 @llvm.bswap.i32(i32 %scalar) + %val = insertelement <4 x i32> undef, i32 %swap, i32 0 + %ret = shufflevector <4 x i32> %val, <4 x i32> undef, + <4 x i32> zeroinitializer + ret <4 x i32> %ret +} + +; Test a v4i32 replicating load using a vector bswap. +define <4 x i32> @f8(i32 *%ptr) { +; CHECK-LABEL: f8: +; CHECK: vlbrrepf %v24, 0(%r2) +; CHECK: br %r14 + %scalar = load i32, i32 *%ptr + %val = insertelement <4 x i32> undef, i32 %scalar, i32 0 + %rep = shufflevector <4 x i32> %val, <4 x i32> undef, + <4 x i32> zeroinitializer + %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %rep) + ret <4 x i32> %ret +} + +; Test a v2i64 replicating load with no offset. +define <2 x i64> @f9(i64 *%ptr) { +; CHECK-LABEL: f9: +; CHECK: vlbrrepg %v24, 0(%r2) +; CHECK: br %r14 + %scalar = load i64, i64 *%ptr + %swap = call i64 @llvm.bswap.i64(i64 %scalar) + %val = insertelement <2 x i64> undef, i64 %swap, i32 0 + %ret = shufflevector <2 x i64> %val, <2 x i64> undef, + <2 x i32> zeroinitializer + ret <2 x i64> %ret +} + +; Test a v2i64 replicating load with the maximum in-range offset. +define <2 x i64> @f10(i64 *%base) { +; CHECK-LABEL: f10: +; CHECK: vlbrrepg %v24, 4088(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i32 511 + %scalar = load i64, i64 *%ptr + %swap = call i64 @llvm.bswap.i64(i64 %scalar) + %val = insertelement <2 x i64> undef, i64 %swap, i32 0 + %ret = shufflevector <2 x i64> %val, <2 x i64> undef, + <2 x i32> zeroinitializer + ret <2 x i64> %ret +} + +; Test a v2i64 replicating load with the first out-of-range offset. +define <2 x i64> @f11(i64 *%base) { +; CHECK-LABEL: f11: +; CHECK: aghi %r2, 4096 +; CHECK: vlbrrepg %v24, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i32 512 + %scalar = load i64, i64 *%ptr + %swap = call i64 @llvm.bswap.i64(i64 %scalar) + %val = insertelement <2 x i64> undef, i64 %swap, i32 0 + %ret = shufflevector <2 x i64> %val, <2 x i64> undef, + <2 x i32> zeroinitializer + ret <2 x i64> %ret +} + +; Test a v2i64 replicating load using a vector bswap. +define <2 x i64> @f12(i64 *%ptr) { +; CHECK-LABEL: f12: +; CHECK: vlbrrepg %v24, 0(%r2) +; CHECK: br %r14 + %scalar = load i64, i64 *%ptr + %val = insertelement <2 x i64> undef, i64 %scalar, i32 0 + %rep = shufflevector <2 x i64> %val, <2 x i64> undef, + <2 x i32> zeroinitializer + %ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %rep) + ret <2 x i64> %ret +} + +; Test a v8i16 replicating load with an index. +define <8 x i16> @f13(i16 *%base, i64 %index) { +; CHECK-LABEL: f13: +; CHECK: sllg [[REG:%r[1-5]]], %r3, 1 +; CHECK: vlbrreph %v24, 2046([[REG]],%r2) +; CHECK: br %r14 + %ptr1 = getelementptr i16, i16 *%base, i64 %index + %ptr = getelementptr i16, i16 *%ptr1, i64 1023 + %scalar = load i16, i16 *%ptr + %swap = call i16 @llvm.bswap.i16(i16 %scalar) + %val = insertelement <8 x i16> undef, i16 %swap, i32 0 + %ret = shufflevector <8 x i16> %val, <8 x i16> undef, + <8 x i32> zeroinitializer + ret <8 x i16> %ret +} + diff --git a/llvm/test/CodeGen/SystemZ/vec-conv-03.ll b/llvm/test/CodeGen/SystemZ/vec-conv-03.ll new file mode 100644 index 00000000000..8398876821c --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-conv-03.ll @@ -0,0 +1,40 @@ +; Test conversions between integer and float elements on arch13. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s + +; Test conversion of f32s to signed i32s. +define <4 x i32> @f1(<4 x float> %floats) { +; CHECK-LABEL: f1: +; CHECK: vcfeb %v24, %v24, 0, 5 +; CHECK: br %r14 + %dwords = fptosi <4 x float> %floats to <4 x i32> + ret <4 x i32> %dwords +} + +; Test conversion of f32s to unsigned i32s. +define <4 x i32> @f2(<4 x float> %floats) { +; CHECK-LABEL: f2: +; CHECK: vclfeb %v24, %v24, 0, 5 +; CHECK: br %r14 + %dwords = fptoui <4 x float> %floats to <4 x i32> + ret <4 x i32> %dwords +} + +; Test conversion of signed i32s to f32s. +define <4 x float> @f3(<4 x i32> %dwords) { +; CHECK-LABEL: f3: +; CHECK: vcefb %v24, %v24, 0, 0 +; CHECK: br %r14 + %floats = sitofp <4 x i32> %dwords to <4 x float> + ret <4 x float> %floats +} + +; Test conversion of unsigned i32s to f32s. +define <4 x float> @f4(<4 x i32> %dwords) { +; CHECK-LABEL: f4: +; CHECK: vcelfb %v24, %v24, 0, 0 +; CHECK: br %r14 + %floats = uitofp <4 x i32> %dwords to <4 x float> + ret <4 x float> %floats +} + diff --git a/llvm/test/CodeGen/SystemZ/vec-eswap-01.ll b/llvm/test/CodeGen/SystemZ/vec-eswap-01.ll new file mode 100644 index 00000000000..89b65392440 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-eswap-01.ll @@ -0,0 +1,138 @@ +; Test loads of byte-swapped vector elements. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s + +; Test v16i8 loads. +define <16 x i8> @f1(<16 x i8> *%ptr) { +; CHECK-LABEL: f1: +; CHECK: vlbrq %v24, 0(%r2) +; CHECK: br %r14 + %load = load <16 x i8>, <16 x i8> *%ptr + %ret = shufflevector <16 x i8> %load, <16 x i8> undef, + <16 x i32> <i32 15, i32 14, i32 13, i32 12, + i32 11, i32 10, i32 9, i32 8, + i32 7, i32 6, i32 5, i32 4, + i32 3, i32 2, i32 1, i32 0> + ret <16 x i8> %ret +} + +; Test v8i16 loads. +define <8 x i16> @f2(<8 x i16> *%ptr) { +; CHECK-LABEL: f2: +; CHECK: vlerh %v24, 0(%r2) +; CHECK: br %r14 + %load = load <8 x i16>, <8 x i16> *%ptr + %ret = shufflevector <8 x i16> %load, <8 x i16> undef, + <8 x i32> <i32 7, i32 6, i32 5, i32 4, + i32 3, i32 2, i32 1, i32 0> + ret <8 x i16> %ret +} + +; Test v4i32 loads. +define <4 x i32> @f3(<4 x i32> *%ptr) { +; CHECK-LABEL: f3: +; CHECK: vlerf %v24, 0(%r2) +; CHECK: br %r14 + %load = load <4 x i32>, <4 x i32> *%ptr + %ret = shufflevector <4 x i32> %load, <4 x i32> undef, + <4 x i32> <i32 3, i32 2, i32 1, i32 0> + ret <4 x i32> %ret +} + +; Test v2i64 loads. +define <2 x i64> @f4(<2 x i64> *%ptr) { +; CHECK-LABEL: f4: +; CHECK: vlerg %v24, 0(%r2) +; CHECK: br %r14 + %load = load <2 x i64>, <2 x i64> *%ptr + %ret = shufflevector <2 x i64> %load, <2 x i64> undef, + <2 x i32> <i32 1, i32 0> + ret <2 x i64> %ret +} + +; Test v4f32 loads. +define <4 x float> @f5(<4 x float> *%ptr) { +; CHECK-LABEL: f5: +; CHECK: vlerf %v24, 0(%r2) +; CHECK: br %r14 + %load = load <4 x float>, <4 x float> *%ptr + %ret = shufflevector <4 x float> %load, <4 x float> undef, + <4 x i32> <i32 3, i32 2, i32 1, i32 0> + ret <4 x float> %ret +} + +; Test v2f64 loads. +define <2 x double> @f6(<2 x double> *%ptr) { +; CHECK-LABEL: f6: +; CHECK: vlerg %v24, 0(%r2) +; CHECK: br %r14 + %load = load <2 x double>, <2 x double> *%ptr + %ret = shufflevector <2 x double> %load, <2 x double> undef, + <2 x i32> <i32 1, i32 0> + ret <2 x double> %ret +} + +; Test the highest aligned in-range offset. +define <4 x i32> @f7(<4 x i32> *%base) { +; CHECK-LABEL: f7: +; CHECK: vlerf %v24, 4080(%r2) +; CHECK: br %r14 + %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 255 + %load = load <4 x i32>, <4 x i32> *%ptr + %ret = shufflevector <4 x i32> %load, <4 x i32> undef, + <4 x i32> <i32 3, i32 2, i32 1, i32 0> + ret <4 x i32> %ret +} + +; Test the highest unaligned in-range offset. +define <4 x i32> @f8(i8 *%base) { +; CHECK-LABEL: f8: +; CHECK: vlerf %v24, 4095(%r2) +; CHECK: br %r14 + %addr = getelementptr i8, i8 *%base, i64 4095 + %ptr = bitcast i8 *%addr to <4 x i32> * + %load = load <4 x i32>, <4 x i32> *%ptr + %ret = shufflevector <4 x i32> %load, <4 x i32> undef, + <4 x i32> <i32 3, i32 2, i32 1, i32 0> + ret <4 x i32> %ret +} + +; Test the next offset up, which requires separate address logic, +define <4 x i32> @f9(<4 x i32> *%base) { +; CHECK-LABEL: f9: +; CHECK: aghi %r2, 4096 +; CHECK: vlerf %v24, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 256 + %load = load <4 x i32>, <4 x i32> *%ptr + %ret = shufflevector <4 x i32> %load, <4 x i32> undef, + <4 x i32> <i32 3, i32 2, i32 1, i32 0> + ret <4 x i32> %ret +} + +; Test negative offsets, which also require separate address logic, +define <4 x i32> @f10(<4 x i32> *%base) { +; CHECK-LABEL: f10: +; CHECK: aghi %r2, -16 +; CHECK: vlerf %v24, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 -1 + %load = load <4 x i32>, <4 x i32> *%ptr + %ret = shufflevector <4 x i32> %load, <4 x i32> undef, + <4 x i32> <i32 3, i32 2, i32 1, i32 0> + ret <4 x i32> %ret +} + +; Check that indexes are allowed. +define <4 x i32> @f11(i8 *%base, i64 %index) { +; CHECK-LABEL: f11: +; CHECK: vlerf %v24, 0(%r3,%r2) +; CHECK: br %r14 + %addr = getelementptr i8, i8 *%base, i64 %index + %ptr = bitcast i8 *%addr to <4 x i32> * + %load = load <4 x i32>, <4 x i32> *%ptr + %ret = shufflevector <4 x i32> %load, <4 x i32> undef, + <4 x i32> <i32 3, i32 2, i32 1, i32 0> + ret <4 x i32> %ret +} + diff --git a/llvm/test/CodeGen/SystemZ/vec-eswap-02.ll b/llvm/test/CodeGen/SystemZ/vec-eswap-02.ll new file mode 100644 index 00000000000..1eab5a06ff2 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-eswap-02.ll @@ -0,0 +1,138 @@ +; Test stores of element-swapped vector elements. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s + +; Test v16i8 stores. +define void @f1(<16 x i8> %val, <16 x i8> *%ptr) { +; CHECK-LABEL: f1: +; CHECK: vstbrq %v24, 0(%r2) +; CHECK: br %r14 + %swap = shufflevector <16 x i8> %val, <16 x i8> undef, + <16 x i32> <i32 15, i32 14, i32 13, i32 12, + i32 11, i32 10, i32 9, i32 8, + i32 7, i32 6, i32 5, i32 4, + i32 3, i32 2, i32 1, i32 0> + store <16 x i8> %swap, <16 x i8> *%ptr + ret void +} + +; Test v8i16 stores. +define void @f2(<8 x i16> %val, <8 x i16> *%ptr) { +; CHECK-LABEL: f2: +; CHECK: vsterh %v24, 0(%r2) +; CHECK: br %r14 + %swap = shufflevector <8 x i16> %val, <8 x i16> undef, + <8 x i32> <i32 7, i32 6, i32 5, i32 4, + i32 3, i32 2, i32 1, i32 0> + store <8 x i16> %swap, <8 x i16> *%ptr + ret void +} + +; Test v4i32 stores. +define void @f3(<4 x i32> %val, <4 x i32> *%ptr) { +; CHECK-LABEL: f3: +; CHECK: vsterf %v24, 0(%r2) +; CHECK: br %r14 + %swap = shufflevector <4 x i32> %val, <4 x i32> undef, + <4 x i32> <i32 3, i32 2, i32 1, i32 0> + store <4 x i32> %swap, <4 x i32> *%ptr + ret void +} + +; Test v2i64 stores. +define void @f4(<2 x i64> %val, <2 x i64> *%ptr) { +; CHECK-LABEL: f4: +; CHECK: vsterg %v24, 0(%r2) +; CHECK: br %r14 + %swap = shufflevector <2 x i64> %val, <2 x i64> undef, + <2 x i32> <i32 1, i32 0> + store <2 x i64> %swap, <2 x i64> *%ptr + ret void +} + +; Test v4f32 stores. +define void @f5(<4 x float> %val, <4 x float> *%ptr) { +; CHECK-LABEL: f5: +; CHECK: vsterf %v24, 0(%r2) +; CHECK: br %r14 + %swap = shufflevector <4 x float> %val, <4 x float> undef, + <4 x i32> <i32 3, i32 2, i32 1, i32 0> + store <4 x float> %swap, <4 x float> *%ptr + ret void +} + +; Test v2f64 stores. +define void @f6(<2 x double> %val, <2 x double> *%ptr) { +; CHECK-LABEL: f6: +; CHECK: vsterg %v24, 0(%r2) +; CHECK: br %r14 + %swap = shufflevector <2 x double> %val, <2 x double> undef, + <2 x i32> <i32 1, i32 0> + store <2 x double> %swap, <2 x double> *%ptr + ret void +} + +; Test the highest aligned in-range offset. +define void @f7(<4 x i32> %val, <4 x i32> *%base) { +; CHECK-LABEL: f7: +; CHECK: vsterf %v24, 4080(%r2) +; CHECK: br %r14 + %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 255 + %swap = shufflevector <4 x i32> %val, <4 x i32> undef, + <4 x i32> <i32 3, i32 2, i32 1, i32 0> + store <4 x i32> %swap, <4 x i32> *%ptr + ret void +} + +; Test the highest unaligned in-range offset. +define void @f8(<4 x i32> %val, i8 *%base) { +; CHECK-LABEL: f8: +; CHECK: vsterf %v24, 4095(%r2) +; CHECK: br %r14 + %addr = getelementptr i8, i8 *%base, i64 4095 + %ptr = bitcast i8 *%addr to <4 x i32> * + %swap = shufflevector <4 x i32> %val, <4 x i32> undef, + <4 x i32> <i32 3, i32 2, i32 1, i32 0> + store <4 x i32> %swap, <4 x i32> *%ptr, align 1 + ret void +} + +; Test the next offset up, which requires separate address logic, +define void @f9(<4 x i32> %val, <4 x i32> *%base) { +; CHECK-LABEL: f9: +; CHECK: aghi %r2, 4096 +; CHECK: vsterf %v24, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 256 + %swap = shufflevector <4 x i32> %val, <4 x i32> undef, + <4 x i32> <i32 3, i32 2, i32 1, i32 0> + store <4 x i32> %swap, <4 x i32> *%ptr + ret void +} + +; Test negative offsets, which also require separate address logic, +define void @f10(<4 x i32> %val, <4 x i32> *%base) { +; CHECK-LABEL: f10: +; CHECK: aghi %r2, -16 +; CHECK: vsterf %v24, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 -1 + %swap = shufflevector <4 x i32> %val, <4 x i32> undef, + <4 x i32> <i32 3, i32 2, i32 1, i32 0> + store <4 x i32> %swap, <4 x i32> *%ptr + ret void +} + +; Check that indexes are allowed. +define void @f11(<4 x i32> %val, i8 *%base, i64 %index) { +; CHECK-LABEL: f11: +; CHECK: vsterf %v24, 0(%r3,%r2) +; CHECK: br %r14 + %addr = getelementptr i8, i8 *%base, i64 %index + %ptr = bitcast i8 *%addr to <4 x i32> * + %swap = shufflevector <4 x i32> %val, <4 x i32> undef, + <4 x i32> <i32 3, i32 2, i32 1, i32 0> + store <4 x i32> %swap, <4 x i32> *%ptr, align 1 + ret void +} + diff --git a/llvm/test/CodeGen/SystemZ/vec-intrinsics-03.ll b/llvm/test/CodeGen/SystemZ/vec-intrinsics-03.ll new file mode 100644 index 00000000000..d192ad89488 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-intrinsics-03.ll @@ -0,0 +1,154 @@ +; Test vector intrinsics added with arch13. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s + +declare <16 x i8> @llvm.s390.vsld(<16 x i8>, <16 x i8>, i32) +declare <16 x i8> @llvm.s390.vsrd(<16 x i8>, <16 x i8>, i32) + +declare {<16 x i8>, i32} @llvm.s390.vstrsb(<16 x i8>, <16 x i8>, <16 x i8>) +declare {<16 x i8>, i32} @llvm.s390.vstrsh(<8 x i16>, <8 x i16>, <16 x i8>) +declare {<16 x i8>, i32} @llvm.s390.vstrsf(<4 x i32>, <4 x i32>, <16 x i8>) +declare {<16 x i8>, i32} @llvm.s390.vstrszb(<16 x i8>, <16 x i8>, <16 x i8>) +declare {<16 x i8>, i32} @llvm.s390.vstrszh(<8 x i16>, <8 x i16>, <16 x i8>) +declare {<16 x i8>, i32} @llvm.s390.vstrszf(<4 x i32>, <4 x i32>, <16 x i8>) + + +; VSLD with the minimum useful value. +define <16 x i8> @test_vsld_1(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vsld_1: +; CHECK: vsld %v24, %v24, %v26, 1 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vsld(<16 x i8> %a, <16 x i8> %b, i32 1) + ret <16 x i8> %res +} + +; VSLD with the maximum value. +define <16 x i8> @test_vsld_7(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vsld_7: +; CHECK: vsld %v24, %v24, %v26, 7 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vsld(<16 x i8> %a, <16 x i8> %b, i32 7) + ret <16 x i8> %res +} + +; VSRD with the minimum useful value. +define <16 x i8> @test_vsrd_1(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vsrd_1: +; CHECK: vsrd %v24, %v24, %v26, 1 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vsrd(<16 x i8> %a, <16 x i8> %b, i32 1) + ret <16 x i8> %res +} + +; VSRD with the maximum value. +define <16 x i8> @test_vsrd_7(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: test_vsrd_7: +; CHECK: vsrd %v24, %v24, %v26, 7 +; CHECK: br %r14 + %res = call <16 x i8> @llvm.s390.vsrd(<16 x i8> %a, <16 x i8> %b, i32 7) + ret <16 x i8> %res +} + + +; VSTRSB. +define <16 x i8> @test_vstrsb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, + i32 *%ccptr) { +; CHECK-LABEL: test_vstrsb: +; CHECK: vstrsb %v24, %v24, %v26, %v28, 0 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vstrsb(<16 x i8> %a, <16 x i8> %b, + <16 x i8> %c) + %res = extractvalue {<16 x i8>, i32} %call, 0 + %cc = extractvalue {<16 x i8>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <16 x i8> %res +} + +; VSTRSH. +define <16 x i8> @test_vstrsh(<8 x i16> %a, <8 x i16> %b, <16 x i8> %c, + i32 *%ccptr) { +; CHECK-LABEL: test_vstrsh: +; CHECK: vstrsh %v24, %v24, %v26, %v28, 0 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vstrsh(<8 x i16> %a, <8 x i16> %b, + <16 x i8> %c) + %res = extractvalue {<16 x i8>, i32} %call, 0 + %cc = extractvalue {<16 x i8>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <16 x i8> %res +} + +; VSTRSFS. +define <16 x i8> @test_vstrsf(<4 x i32> %a, <4 x i32> %b, <16 x i8> %c, + i32 *%ccptr) { +; CHECK-LABEL: test_vstrsf: +; CHECK: vstrsf %v24, %v24, %v26, %v28, 0 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vstrsf(<4 x i32> %a, <4 x i32> %b, + <16 x i8> %c) + %res = extractvalue {<16 x i8>, i32} %call, 0 + %cc = extractvalue {<16 x i8>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <16 x i8> %res +} + +; VSTRSZB. +define <16 x i8> @test_vstrszb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, + i32 *%ccptr) { +; CHECK-LABEL: test_vstrszb: +; CHECK: vstrszb %v24, %v24, %v26, %v28 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vstrszb(<16 x i8> %a, <16 x i8> %b, + <16 x i8> %c) + %res = extractvalue {<16 x i8>, i32} %call, 0 + %cc = extractvalue {<16 x i8>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <16 x i8> %res +} + +; VSTRSZH. +define <16 x i8> @test_vstrszh(<8 x i16> %a, <8 x i16> %b, <16 x i8> %c, + i32 *%ccptr) { +; CHECK-LABEL: test_vstrszh: +; CHECK: vstrszh %v24, %v24, %v26, %v28 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vstrszh(<8 x i16> %a, <8 x i16> %b, + <16 x i8> %c) + %res = extractvalue {<16 x i8>, i32} %call, 0 + %cc = extractvalue {<16 x i8>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <16 x i8> %res +} + +; VSTRSZF. +define <16 x i8> @test_vstrszf(<4 x i32> %a, <4 x i32> %b, <16 x i8> %c, + i32 *%ccptr) { +; CHECK-LABEL: test_vstrszf: +; CHECK: vstrszf %v24, %v24, %v26, %v28 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: srl [[REG]], 28 +; CHECK: st [[REG]], 0(%r2) +; CHECK: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vstrszf(<4 x i32> %a, <4 x i32> %b, + <16 x i8> %c) + %res = extractvalue {<16 x i8>, i32} %call, 0 + %cc = extractvalue {<16 x i8>, i32} %call, 1 + store i32 %cc, i32 *%ccptr + ret <16 x i8> %res +} + diff --git a/llvm/test/MC/Disassembler/SystemZ/insns-arch13.txt b/llvm/test/MC/Disassembler/SystemZ/insns-arch13.txt new file mode 100644 index 00000000000..3ed0435510e --- /dev/null +++ b/llvm/test/MC/Disassembler/SystemZ/insns-arch13.txt @@ -0,0 +1,1479 @@ +# Test arch13 instructions that don't have PC-relative operands. +# RUN: llvm-mc --disassemble %s -triple=s390x-linux-gnu -mcpu=arch13 \ +# RUN: | FileCheck %s + +# CHECK: dfltcc %r2, %r2, %r2 +0xb9 0x39 0x20 0x22 + +# CHECK: dfltcc %r2, %r8, %r15 +0xb9 0x39 0xf0 0x28 + +# CHECK: dfltcc %r14, %r8, %r2 +0xb9 0x39 0x20 0xe8 + +# CHECK: dfltcc %r6, %r8, %r10 +0xb9 0x39 0xa0 0x68 + +# CHECK: kdsa %r0, %r2 +0xb9 0x3a 0x00 0x02 + +# CHECK: kdsa %r0, %r14 +0xb9 0x3a 0x00 0x0e + +# CHECK: kdsa %r15, %r2 +0xb9 0x3a 0x00 0xf2 + +# CHECK: kdsa %r7, %r10 +0xb9 0x3a 0x00 0x7a + +# CHECK: mvcrl 0, 0 +0xe5 0x0a 0x00 0x00 0x00 0x00 + +# CHECK: mvcrl 0(%r1), 0(%r2) +0xe5 0x0a 0x10 0x00 0x20 0x00 + +# CHECK: mvcrl 160(%r1), 320(%r15) +0xe5 0x0a 0x10 0xa0 0xf1 0x40 + +# CHECK: mvcrl 0(%r1), 4095 +0xe5 0x0a 0x10 0x00 0x0f 0xff + +# CHECK: mvcrl 0(%r1), 4095(%r2) +0xe5 0x0a 0x10 0x00 0x2f 0xff + +# CHECK: mvcrl 0(%r1), 4095(%r15) +0xe5 0x0a 0x10 0x00 0xff 0xff + +# CHECK: mvcrl 0(%r1), 0 +0xe5 0x0a 0x10 0x00 0x00 0x00 + +# CHECK: mvcrl 0(%r15), 0 +0xe5 0x0a 0xf0 0x00 0x00 0x00 + +# CHECK: mvcrl 4095(%r1), 0 +0xe5 0x0a 0x1f 0xff 0x00 0x00 + +# CHECK: mvcrl 4095(%r15), 0 +0xe5 0x0a 0xff 0xff 0x00 0x00 + +# CHECK: ncgrk %r0, %r0, %r0 +0xb9 0xe5 0x00 0x00 + +# CHECK: ncgrk %r0, %r0, %r15 +0xb9 0xe5 0xf0 0x00 + +# CHECK: ncgrk %r0, %r15, %r0 +0xb9 0xe5 0x00 0x0f + +# CHECK: ncgrk %r15, %r0, %r0 +0xb9 0xe5 0x00 0xf0 + +# CHECK: ncgrk %r7, %r8, %r9 +0xb9 0xe5 0x90 0x78 + +# CHECK: ncrk %r0, %r0, %r0 +0xb9 0xf5 0x00 0x00 + +# CHECK: ncrk %r0, %r0, %r15 +0xb9 0xf5 0xf0 0x00 + +# CHECK: ncrk %r0, %r15, %r0 +0xb9 0xf5 0x00 0x0f + +# CHECK: ncrk %r15, %r0, %r0 +0xb9 0xf5 0x00 0xf0 + +# CHECK: ncrk %r7, %r8, %r9 +0xb9 0xf5 0x90 0x78 + +# CHECK: nngrk %r0, %r0, %r0 +0xb9 0x64 0x00 0x00 + +# CHECK: nngrk %r0, %r0, %r15 +0xb9 0x64 0xf0 0x00 + +# CHECK: nngrk %r0, %r15, %r0 +0xb9 0x64 0x00 0x0f + +# CHECK: nngrk %r15, %r0, %r0 +0xb9 0x64 0x00 0xf0 + +# CHECK: nngrk %r7, %r8, %r9 +0xb9 0x64 0x90 0x78 + +# CHECK: nnrk %r0, %r0, %r0 +0xb9 0x74 0x00 0x00 + +# CHECK: nnrk %r0, %r0, %r15 +0xb9 0x74 0xf0 0x00 + +# CHECK: nnrk %r0, %r15, %r0 +0xb9 0x74 0x00 0x0f + +# CHECK: nnrk %r15, %r0, %r0 +0xb9 0x74 0x00 0xf0 + +# CHECK: nnrk %r7, %r8, %r9 +0xb9 0x74 0x90 0x78 + +# CHECK: nogrk %r0, %r0, %r0 +0xb9 0x66 0x00 0x00 + +# CHECK: nogrk %r0, %r0, %r15 +0xb9 0x66 0xf0 0x00 + +# CHECK: nogrk %r0, %r15, %r0 +0xb9 0x66 0x00 0x0f + +# CHECK: nogrk %r15, %r0, %r0 +0xb9 0x66 0x00 0xf0 + +# CHECK: nogrk %r7, %r8, %r9 +0xb9 0x66 0x90 0x78 + +# CHECK: nork %r0, %r0, %r0 +0xb9 0x76 0x00 0x00 + +# CHECK: nork %r0, %r0, %r15 +0xb9 0x76 0xf0 0x00 + +# CHECK: nork %r0, %r15, %r0 +0xb9 0x76 0x00 0x0f + +# CHECK: nork %r15, %r0, %r0 +0xb9 0x76 0x00 0xf0 + +# CHECK: nork %r7, %r8, %r9 +0xb9 0x76 0x90 0x78 + +# CHECK: nxgrk %r0, %r0, %r0 +0xb9 0x67 0x00 0x00 + +# CHECK: nxgrk %r0, %r0, %r15 +0xb9 0x67 0xf0 0x00 + +# CHECK: nxgrk %r0, %r15, %r0 +0xb9 0x67 0x00 0x0f + +# CHECK: nxgrk %r15, %r0, %r0 +0xb9 0x67 0x00 0xf0 + +# CHECK: nxgrk %r7, %r8, %r9 +0xb9 0x67 0x90 0x78 + +# CHECK: nxrk %r0, %r0, %r0 +0xb9 0x77 0x00 0x00 + +# CHECK: nxrk %r0, %r0, %r15 +0xb9 0x77 0xf0 0x00 + +# CHECK: nxrk %r0, %r15, %r0 +0xb9 0x77 0x00 0x0f + +# CHECK: nxrk %r15, %r0, %r0 +0xb9 0x77 0x00 0xf0 + +# CHECK: nxrk %r7, %r8, %r9 +0xb9 0x77 0x90 0x78 + +# CHECK: ocgrk %r0, %r0, %r0 +0xb9 0x65 0x00 0x00 + +# CHECK: ocgrk %r0, %r0, %r15 +0xb9 0x65 0xf0 0x00 + +# CHECK: ocgrk %r0, %r15, %r0 +0xb9 0x65 0x00 0x0f + +# CHECK: ocgrk %r15, %r0, %r0 +0xb9 0x65 0x00 0xf0 + +# CHECK: ocgrk %r7, %r8, %r9 +0xb9 0x65 0x90 0x78 + +# CHECK: ocrk %r0, %r0, %r0 +0xb9 0x75 0x00 0x00 + +# CHECK: ocrk %r0, %r0, %r15 +0xb9 0x75 0xf0 0x00 + +# CHECK: ocrk %r0, %r15, %r0 +0xb9 0x75 0x00 0x0f + +# CHECK: ocrk %r15, %r0, %r0 +0xb9 0x75 0x00 0xf0 + +# CHECK: ocrk %r7, %r8, %r9 +0xb9 0x75 0x90 0x78 + +# CHECK: popcnt %r0, %r0 +0xb9 0xe1 0x00 0x00 + +# CHECK: popcnt %r0, %r15 +0xb9 0xe1 0x00 0x0f + +# CHECK: popcnt %r14, %r0 +0xb9 0xe1 0x00 0xe0 + +# CHECK: popcnt %r6, %r8 +0xb9 0xe1 0x00 0x68 + +# CHECK: popcnt %r4, %r13, 1 +0xb9 0xe1 0x10 0x4d + +# CHECK: popcnt %r4, %r13, 15 +0xb9 0xe1 0xf0 0x4d + +# CHECK: selgr %r0, %r0, %r0, 0 +0xb9 0xe3 0x00 0x00 + +# CHECK: selgr %r0, %r0, %r0, 15 +0xb9 0xe3 0x0f 0x00 + +# CHECK: selgr %r0, %r0, %r15, 0 +0xb9 0xe3 0xf0 0x00 + +# CHECK: selgr %r0, %r15, %r0, 0 +0xb9 0xe3 0x00 0x0f + +# CHECK: selgr %r15, %r0, %r0, 0 +0xb9 0xe3 0x00 0xf0 + +# CHECK: selgro %r1, %r2, %r3 +0xb9 0xe3 0x31 0x12 + +# CHECK: selgrh %r1, %r2, %r3 +0xb9 0xe3 0x32 0x12 + +# CHECK: selgrnle %r1, %r2, %r3 +0xb9 0xe3 0x33 0x12 + +# CHECK: selgrl %r1, %r2, %r3 +0xb9 0xe3 0x34 0x12 + +# CHECK: selgrnhe %r1, %r2, %r3 +0xb9 0xe3 0x35 0x12 + +# CHECK: selgrlh %r1, %r2, %r3 +0xb9 0xe3 0x36 0x12 + +# CHECK: selgrne %r1, %r2, %r3 +0xb9 0xe3 0x37 0x12 + +# CHECK: selgre %r1, %r2, %r3 +0xb9 0xe3 0x38 0x12 + +# CHECK: selgrnlh %r1, %r2, %r3 +0xb9 0xe3 0x39 0x12 + +# CHECK: selgrhe %r1, %r2, %r3 +0xb9 0xe3 0x3a 0x12 + +# CHECK: selgrnl %r1, %r2, %r3 +0xb9 0xe3 0x3b 0x12 + +# CHECK: selgrle %r1, %r2, %r3 +0xb9 0xe3 0x3c 0x12 + +# CHECK: selgrnh %r1, %r2, %r3 +0xb9 0xe3 0x3d 0x12 + +# CHECK: selgrno %r1, %r2, %r3 +0xb9 0xe3 0x3e 0x12 + +# CHECK: selfhr %r0, %r0, %r0, 0 +0xb9 0xc0 0x00 0x00 + +# CHECK: selfhr %r0, %r0, %r0, 15 +0xb9 0xc0 0x0f 0x00 + +# CHECK: selfhr %r0, %r0, %r15, 0 +0xb9 0xc0 0xf0 0x00 + +# CHECK: selfhr %r0, %r15, %r0, 0 +0xb9 0xc0 0x00 0x0f + +# CHECK: selfhr %r15, %r0, %r0, 0 +0xb9 0xc0 0x00 0xf0 + +# CHECK: selfhro %r1, %r2, %r3 +0xb9 0xc0 0x31 0x12 + +# CHECK: selfhrh %r1, %r2, %r3 +0xb9 0xc0 0x32 0x12 + +# CHECK: selfhrnle %r1, %r2, %r3 +0xb9 0xc0 0x33 0x12 + +# CHECK: selfhrl %r1, %r2, %r3 +0xb9 0xc0 0x34 0x12 + +# CHECK: selfhrnhe %r1, %r2, %r3 +0xb9 0xc0 0x35 0x12 + +# CHECK: selfhrlh %r1, %r2, %r3 +0xb9 0xc0 0x36 0x12 + +# CHECK: selfhrne %r1, %r2, %r3 +0xb9 0xc0 0x37 0x12 + +# CHECK: selfhre %r1, %r2, %r3 +0xb9 0xc0 0x38 0x12 + +# CHECK: selfhrnlh %r1, %r2, %r3 +0xb9 0xc0 0x39 0x12 + +# CHECK: selfhrhe %r1, %r2, %r3 +0xb9 0xc0 0x3a 0x12 + +# CHECK: selfhrnl %r1, %r2, %r3 +0xb9 0xc0 0x3b 0x12 + +# CHECK: selfhrle %r1, %r2, %r3 +0xb9 0xc0 0x3c 0x12 + +# CHECK: selfhrnh %r1, %r2, %r3 +0xb9 0xc0 0x3d 0x12 + +# CHECK: selfhrno %r1, %r2, %r3 +0xb9 0xc0 0x3e 0x12 + +# CHECK: selr %r0, %r0, %r0, 0 +0xb9 0xf0 0x00 0x00 + +# CHECK: selr %r0, %r0, %r0, 15 +0xb9 0xf0 0x0f 0x00 + +# CHECK: selr %r0, %r0, %r15, 0 +0xb9 0xf0 0xf0 0x00 + +# CHECK: selr %r0, %r15, %r0, 0 +0xb9 0xf0 0x00 0x0f + +# CHECK: selr %r15, %r0, %r0, 0 +0xb9 0xf0 0x00 0xf0 + +# CHECK: selro %r1, %r2, %r3 +0xb9 0xf0 0x31 0x12 + +# CHECK: selrh %r1, %r2, %r3 +0xb9 0xf0 0x32 0x12 + +# CHECK: selrnle %r1, %r2, %r3 +0xb9 0xf0 0x33 0x12 + +# CHECK: selrl %r1, %r2, %r3 +0xb9 0xf0 0x34 0x12 + +# CHECK: selrnhe %r1, %r2, %r3 +0xb9 0xf0 0x35 0x12 + +# CHECK: selrlh %r1, %r2, %r3 +0xb9 0xf0 0x36 0x12 + +# CHECK: selrne %r1, %r2, %r3 +0xb9 0xf0 0x37 0x12 + +# CHECK: selre %r1, %r2, %r3 +0xb9 0xf0 0x38 0x12 + +# CHECK: selrnlh %r1, %r2, %r3 +0xb9 0xf0 0x39 0x12 + +# CHECK: selrhe %r1, %r2, %r3 +0xb9 0xf0 0x3a 0x12 + +# CHECK: selrnl %r1, %r2, %r3 +0xb9 0xf0 0x3b 0x12 + +# CHECK: selrle %r1, %r2, %r3 +0xb9 0xf0 0x3c 0x12 + +# CHECK: selrnh %r1, %r2, %r3 +0xb9 0xf0 0x3d 0x12 + +# CHECK: selrno %r1, %r2, %r3 +0xb9 0xf0 0x3e 0x12 + +# CHECK: sortl %r2, %r2 +0xb9 0x38 0x00 0x22 + +# CHECK: sortl %r2, %r14 +0xb9 0x38 0x00 0x2e + +# CHECK: sortl %r14, %r2 +0xb9 0x38 0x00 0xe2 + +# CHECK: sortl %r6, %r10 +0xb9 0x38 0x00 0x6a + +# CHECK: vcefb %v0, %v0, 0, 0 +0xe7 0x00 0x00 0x00 0x20 0xc3 + +# CHECK: vcefb %v0, %v0, 0, 15 +0xe7 0x00 0x00 0xf0 0x20 0xc3 + +# CHECK: vcefb %v0, %v0, 4, 0 +0xe7 0x00 0x00 0x04 0x20 0xc3 + +# CHECK: vcefb %v0, %v31, 0, 0 +0xe7 0x0f 0x00 0x00 0x24 0xc3 + +# CHECK: vcefb %v31, %v0, 0, 0 +0xe7 0xf0 0x00 0x00 0x28 0xc3 + +# CHECK: vcefb %v14, %v17, 4, 10 +0xe7 0xe1 0x00 0xa4 0x24 0xc3 + +# CHECK: vcelfb %v0, %v0, 0, 0 +0xe7 0x00 0x00 0x00 0x20 0xc1 + +# CHECK: vcelfb %v0, %v0, 0, 15 +0xe7 0x00 0x00 0xf0 0x20 0xc1 + +# CHECK: vcelfb %v0, %v0, 4, 0 +0xe7 0x00 0x00 0x04 0x20 0xc1 + +# CHECK: vcelfb %v0, %v31, 0, 0 +0xe7 0x0f 0x00 0x00 0x24 0xc1 + +# CHECK: vcelfb %v31, %v0, 0, 0 +0xe7 0xf0 0x00 0x00 0x28 0xc1 + +# CHECK: vcelfb %v14, %v17, 4, 10 +0xe7 0xe1 0x00 0xa4 0x24 0xc1 + +# CHECK: vcfeb %v0, %v0, 0, 0 +0xe7 0x00 0x00 0x00 0x20 0xc2 + +# CHECK: vcfeb %v0, %v0, 0, 15 +0xe7 0x00 0x00 0xf0 0x20 0xc2 + +# CHECK: vcfeb %v0, %v0, 4, 0 +0xe7 0x00 0x00 0x04 0x20 0xc2 + +# CHECK: vcfeb %v0, %v31, 0, 0 +0xe7 0x0f 0x00 0x00 0x24 0xc2 + +# CHECK: vcfeb %v31, %v0, 0, 0 +0xe7 0xf0 0x00 0x00 0x28 0xc2 + +# CHECK: vcfeb %v14, %v17, 4, 10 +0xe7 0xe1 0x00 0xa4 0x24 0xc2 + +# CHECK: vclfeb %v0, %v0, 0, 0 +0xe7 0x00 0x00 0x00 0x20 0xc0 + +# CHECK: vclfeb %v0, %v0, 0, 15 +0xe7 0x00 0x00 0xf0 0x20 0xc0 + +# CHECK: vclfeb %v0, %v0, 4, 0 +0xe7 0x00 0x00 0x04 0x20 0xc0 + +# CHECK: vclfeb %v0, %v31, 0, 0 +0xe7 0x0f 0x00 0x00 0x24 0xc0 + +# CHECK: vclfeb %v31, %v0, 0, 0 +0xe7 0xf0 0x00 0x00 0x28 0xc0 + +# CHECK: vclfeb %v14, %v17, 4, 10 +0xe7 0xe1 0x00 0xa4 0x24 0xc0 + +# CHECK: vcvb %r0, %v0, 0, 15 +0xe6 0x00 0x00 0x0f 0x00 0x50 + +# CHECK: vcvb %r3, %v18, 4, 6 +0xe6 0x32 0x00 0x46 0x04 0x50 + +# CHECK: vcvbg %r0, %v0, 0, 15 +0xe6 0x00 0x00 0x0f 0x00 0x52 + +# CHECK: vcvbg %r3, %v18, 4, 6 +0xe6 0x32 0x00 0x46 0x04 0x52 + +# CHECK: vlbr %v0, 0, 0 +0xe6 0x00 0x00 0x00 0x00 0x06 + +# CHECK: vlbr %v0, 0, 15 +0xe6 0x00 0x00 0x00 0xf0 0x06 + +# CHECK: vlbr %v0, 4095, 0 +0xe6 0x00 0x0f 0xff 0x00 0x06 + +# CHECK: vlbr %v0, 0(%r15), 0 +0xe6 0x00 0xf0 0x00 0x00 0x06 + +# CHECK: vlbr %v0, 0(%r15,%r1), 0 +0xe6 0x0f 0x10 0x00 0x00 0x06 + +# CHECK: vlbr %v15, 0, 0 +0xe6 0xf0 0x00 0x00 0x00 0x06 + +# CHECK: vlbr %v31, 0, 0 +0xe6 0xf0 0x00 0x00 0x08 0x06 + +# CHECK: vlbr %v18, 1383(%r3,%r4), 11 +0xe6 0x23 0x45 0x67 0xb8 0x06 + +# CHECK: vlbrf %v0, 0 +0xe6 0x00 0x00 0x00 0x20 0x06 + +# CHECK: vlbrf %v0, 4095 +0xe6 0x00 0x0f 0xff 0x20 0x06 + +# CHECK: vlbrf %v0, 0(%r15) +0xe6 0x00 0xf0 0x00 0x20 0x06 + +# CHECK: vlbrf %v0, 0(%r15,%r1) +0xe6 0x0f 0x10 0x00 0x20 0x06 + +# CHECK: vlbrf %v15, 0 +0xe6 0xf0 0x00 0x00 0x20 0x06 + +# CHECK: vlbrf %v31, 0 +0xe6 0xf0 0x00 0x00 0x28 0x06 + +# CHECK: vlbrf %v18, 1383(%r3,%r4) +0xe6 0x23 0x45 0x67 0x28 0x06 + +# CHECK: vlbrg %v0, 0 +0xe6 0x00 0x00 0x00 0x30 0x06 + +# CHECK: vlbrg %v0, 4095 +0xe6 0x00 0x0f 0xff 0x30 0x06 + +# CHECK: vlbrg %v0, 0(%r15) +0xe6 0x00 0xf0 0x00 0x30 0x06 + +# CHECK: vlbrg %v0, 0(%r15,%r1) +0xe6 0x0f 0x10 0x00 0x30 0x06 + +# CHECK: vlbrg %v15, 0 +0xe6 0xf0 0x00 0x00 0x30 0x06 + +# CHECK: vlbrg %v31, 0 +0xe6 0xf0 0x00 0x00 0x38 0x06 + +# CHECK: vlbrg %v18, 1383(%r3,%r4) +0xe6 0x23 0x45 0x67 0x38 0x06 + +# CHECK: vlbrh %v0, 0 +0xe6 0x00 0x00 0x00 0x10 0x06 + +# CHECK: vlbrh %v0, 4095 +0xe6 0x00 0x0f 0xff 0x10 0x06 + +# CHECK: vlbrh %v0, 0(%r15) +0xe6 0x00 0xf0 0x00 0x10 0x06 + +# CHECK: vlbrh %v0, 0(%r15,%r1) +0xe6 0x0f 0x10 0x00 0x10 0x06 + +# CHECK: vlbrh %v15, 0 +0xe6 0xf0 0x00 0x00 0x10 0x06 + +# CHECK: vlbrh %v31, 0 +0xe6 0xf0 0x00 0x00 0x18 0x06 + +# CHECK: vlbrh %v18, 1383(%r3,%r4) +0xe6 0x23 0x45 0x67 0x18 0x06 + +# CHECK: vlbrq %v0, 0 +0xe6 0x00 0x00 0x00 0x40 0x06 + +# CHECK: vlbrq %v0, 4095 +0xe6 0x00 0x0f 0xff 0x40 0x06 + +# CHECK: vlbrq %v0, 0(%r15) +0xe6 0x00 0xf0 0x00 0x40 0x06 + +# CHECK: vlbrq %v0, 0(%r15,%r1) +0xe6 0x0f 0x10 0x00 0x40 0x06 + +# CHECK: vlbrq %v15, 0 +0xe6 0xf0 0x00 0x00 0x40 0x06 + +# CHECK: vlbrq %v31, 0 +0xe6 0xf0 0x00 0x00 0x48 0x06 + +# CHECK: vlbrq %v18, 1383(%r3,%r4) +0xe6 0x23 0x45 0x67 0x48 0x06 + +# CHECK: vlbrrep %v0, 0, 0 +0xe6 0x00 0x00 0x00 0x00 0x05 + +# CHECK: vlbrrep %v0, 0, 15 +0xe6 0x00 0x00 0x00 0xf0 0x05 + +# CHECK: vlbrrep %v0, 4095, 0 +0xe6 0x00 0x0f 0xff 0x00 0x05 + +# CHECK: vlbrrep %v0, 0(%r15), 0 +0xe6 0x00 0xf0 0x00 0x00 0x05 + +# CHECK: vlbrrep %v0, 0(%r15,%r1), 0 +0xe6 0x0f 0x10 0x00 0x00 0x05 + +# CHECK: vlbrrep %v15, 0, 0 +0xe6 0xf0 0x00 0x00 0x00 0x05 + +# CHECK: vlbrrep %v31, 0, 0 +0xe6 0xf0 0x00 0x00 0x08 0x05 + +# CHECK: vlbrrep %v18, 1383(%r3,%r4), 11 +0xe6 0x23 0x45 0x67 0xb8 0x05 + +# CHECK: vlbrrepf %v0, 0 +0xe6 0x00 0x00 0x00 0x20 0x05 + +# CHECK: vlbrrepf %v0, 4095 +0xe6 0x00 0x0f 0xff 0x20 0x05 + +# CHECK: vlbrrepf %v0, 0(%r15) +0xe6 0x00 0xf0 0x00 0x20 0x05 + +# CHECK: vlbrrepf %v0, 0(%r15,%r1) +0xe6 0x0f 0x10 0x00 0x20 0x05 + +# CHECK: vlbrrepf %v15, 0 +0xe6 0xf0 0x00 0x00 0x20 0x05 + +# CHECK: vlbrrepf %v31, 0 +0xe6 0xf0 0x00 0x00 0x28 0x05 + +# CHECK: vlbrrepf %v18, 1383(%r3,%r4) +0xe6 0x23 0x45 0x67 0x28 0x05 + +# CHECK: vlbrrepg %v0, 0 +0xe6 0x00 0x00 0x00 0x30 0x05 + +# CHECK: vlbrrepg %v0, 4095 +0xe6 0x00 0x0f 0xff 0x30 0x05 + +# CHECK: vlbrrepg %v0, 0(%r15) +0xe6 0x00 0xf0 0x00 0x30 0x05 + +# CHECK: vlbrrepg %v0, 0(%r15,%r1) +0xe6 0x0f 0x10 0x00 0x30 0x05 + +# CHECK: vlbrrepg %v15, 0 +0xe6 0xf0 0x00 0x00 0x30 0x05 + +# CHECK: vlbrrepg %v31, 0 +0xe6 0xf0 0x00 0x00 0x38 0x05 + +# CHECK: vlbrrepg %v18, 1383(%r3,%r4) +0xe6 0x23 0x45 0x67 0x38 0x05 + +# CHECK: vlbrreph %v0, 0 +0xe6 0x00 0x00 0x00 0x10 0x05 + +# CHECK: vlbrreph %v0, 4095 +0xe6 0x00 0x0f 0xff 0x10 0x05 + +# CHECK: vlbrreph %v0, 0(%r15) +0xe6 0x00 0xf0 0x00 0x10 0x05 + +# CHECK: vlbrreph %v0, 0(%r15,%r1) +0xe6 0x0f 0x10 0x00 0x10 0x05 + +# CHECK: vlbrreph %v15, 0 +0xe6 0xf0 0x00 0x00 0x10 0x05 + +# CHECK: vlbrreph %v31, 0 +0xe6 0xf0 0x00 0x00 0x18 0x05 + +# CHECK: vlbrreph %v18, 1383(%r3,%r4) +0xe6 0x23 0x45 0x67 0x18 0x05 + +# CHECK: vlebrf %v0, 0, 0 +0xe6 0x00 0x00 0x00 0x00 0x03 + +# CHECK: vlebrf %v0, 0, 3 +0xe6 0x00 0x00 0x00 0x30 0x03 + +# CHECK: vlebrf %v0, 4095, 0 +0xe6 0x00 0x0f 0xff 0x00 0x03 + +# CHECK: vlebrf %v0, 0(%r15), 0 +0xe6 0x00 0xf0 0x00 0x00 0x03 + +# CHECK: vlebrf %v0, 0(%r15,%r1), 0 +0xe6 0x0f 0x10 0x00 0x00 0x03 + +# CHECK: vlebrf %v15, 0, 0 +0xe6 0xf0 0x00 0x00 0x00 0x03 + +# CHECK: vlebrf %v31, 0, 0 +0xe6 0xf0 0x00 0x00 0x08 0x03 + +# CHECK: vlebrf %v18, 1383(%r3,%r4), 2 +0xe6 0x23 0x45 0x67 0x28 0x03 + +# CHECK: vlebrg %v0, 0, 0 +0xe6 0x00 0x00 0x00 0x00 0x02 + +# CHECK: vlebrg %v0, 0, 1 +0xe6 0x00 0x00 0x00 0x10 0x02 + +# CHECK: vlebrg %v0, 4095, 0 +0xe6 0x00 0x0f 0xff 0x00 0x02 + +# CHECK: vlebrg %v0, 0(%r15), 0 +0xe6 0x00 0xf0 0x00 0x00 0x02 + +# CHECK: vlebrg %v0, 0(%r15,%r1), 0 +0xe6 0x0f 0x10 0x00 0x00 0x02 + +# CHECK: vlebrg %v15, 0, 0 +0xe6 0xf0 0x00 0x00 0x00 0x02 + +# CHECK: vlebrg %v31, 0, 0 +0xe6 0xf0 0x00 0x00 0x08 0x02 + +# CHECK: vlebrg %v18, 1383(%r3,%r4), 1 +0xe6 0x23 0x45 0x67 0x18 0x02 + +# CHECK: vlebrh %v0, 0, 0 +0xe6 0x00 0x00 0x00 0x00 0x01 + +# CHECK: vlebrh %v0, 0, 7 +0xe6 0x00 0x00 0x00 0x70 0x01 + +# CHECK: vlebrh %v0, 4095, 0 +0xe6 0x00 0x0f 0xff 0x00 0x01 + +# CHECK: vlebrh %v0, 0(%r15), 0 +0xe6 0x00 0xf0 0x00 0x00 0x01 + +# CHECK: vlebrh %v0, 0(%r15,%r1), 0 +0xe6 0x0f 0x10 0x00 0x00 0x01 + +# CHECK: vlebrh %v15, 0, 0 +0xe6 0xf0 0x00 0x00 0x00 0x01 + +# CHECK: vlebrh %v31, 0, 0 +0xe6 0xf0 0x00 0x00 0x08 0x01 + +# CHECK: vlebrh %v18, 1383(%r3,%r4), 4 +0xe6 0x23 0x45 0x67 0x48 0x01 + +# CHECK: vler %v0, 0, 0 +0xe6 0x00 0x00 0x00 0x00 0x07 + +# CHECK: vler %v0, 0, 15 +0xe6 0x00 0x00 0x00 0xf0 0x07 + +# CHECK: vler %v0, 4095, 0 +0xe6 0x00 0x0f 0xff 0x00 0x07 + +# CHECK: vler %v0, 0(%r15), 0 +0xe6 0x00 0xf0 0x00 0x00 0x07 + +# CHECK: vler %v0, 0(%r15,%r1), 0 +0xe6 0x0f 0x10 0x00 0x00 0x07 + +# CHECK: vler %v15, 0, 0 +0xe6 0xf0 0x00 0x00 0x00 0x07 + +# CHECK: vler %v31, 0, 0 +0xe6 0xf0 0x00 0x00 0x08 0x07 + +# CHECK: vler %v18, 1383(%r3,%r4), 11 +0xe6 0x23 0x45 0x67 0xb8 0x07 + +# CHECK: vlerf %v0, 0 +0xe6 0x00 0x00 0x00 0x20 0x07 + +# CHECK: vlerf %v0, 4095 +0xe6 0x00 0x0f 0xff 0x20 0x07 + +# CHECK: vlerf %v0, 0(%r15) +0xe6 0x00 0xf0 0x00 0x20 0x07 + +# CHECK: vlerf %v0, 0(%r15,%r1) +0xe6 0x0f 0x10 0x00 0x20 0x07 + +# CHECK: vlerf %v15, 0 +0xe6 0xf0 0x00 0x00 0x20 0x07 + +# CHECK: vlerf %v31, 0 +0xe6 0xf0 0x00 0x00 0x28 0x07 + +# CHECK: vlerf %v18, 1383(%r3,%r4) +0xe6 0x23 0x45 0x67 0x28 0x07 + +# CHECK: vlerg %v0, 0 +0xe6 0x00 0x00 0x00 0x30 0x07 + +# CHECK: vlerg %v0, 4095 +0xe6 0x00 0x0f 0xff 0x30 0x07 + +# CHECK: vlerg %v0, 0(%r15) +0xe6 0x00 0xf0 0x00 0x30 0x07 + +# CHECK: vlerg %v0, 0(%r15,%r1) +0xe6 0x0f 0x10 0x00 0x30 0x07 + +# CHECK: vlerg %v15, 0 +0xe6 0xf0 0x00 0x00 0x30 0x07 + +# CHECK: vlerg %v31, 0 +0xe6 0xf0 0x00 0x00 0x38 0x07 + +# CHECK: vlerg %v18, 1383(%r3,%r4) +0xe6 0x23 0x45 0x67 0x38 0x07 + +# CHECK: vlerh %v0, 0 +0xe6 0x00 0x00 0x00 0x10 0x07 + +# CHECK: vlerh %v0, 4095 +0xe6 0x00 0x0f 0xff 0x10 0x07 + +# CHECK: vlerh %v0, 0(%r15) +0xe6 0x00 0xf0 0x00 0x10 0x07 + +# CHECK: vlerh %v0, 0(%r15,%r1) +0xe6 0x0f 0x10 0x00 0x10 0x07 + +# CHECK: vlerh %v15, 0 +0xe6 0xf0 0x00 0x00 0x10 0x07 + +# CHECK: vlerh %v31, 0 +0xe6 0xf0 0x00 0x00 0x18 0x07 + +# CHECK: vlerh %v18, 1383(%r3,%r4) +0xe6 0x23 0x45 0x67 0x18 0x07 + +# CHECK: vllebrz %v0, 0, 0 +0xe6 0x00 0x00 0x00 0x00 0x04 + +# CHECK: vllebrz %v0, 0, 15 +0xe6 0x00 0x00 0x00 0xf0 0x04 + +# CHECK: vllebrz %v0, 4095, 0 +0xe6 0x00 0x0f 0xff 0x00 0x04 + +# CHECK: vllebrz %v0, 0(%r15), 0 +0xe6 0x00 0xf0 0x00 0x00 0x04 + +# CHECK: vllebrz %v0, 0(%r15,%r1), 0 +0xe6 0x0f 0x10 0x00 0x00 0x04 + +# CHECK: vllebrz %v15, 0, 0 +0xe6 0xf0 0x00 0x00 0x00 0x04 + +# CHECK: vllebrz %v31, 0, 0 +0xe6 0xf0 0x00 0x00 0x08 0x04 + +# CHECK: vllebrz %v18, 1383(%r3,%r4), 11 +0xe6 0x23 0x45 0x67 0xb8 0x04 + +# CHECK: vllebrze %v0, 0 +0xe6 0x00 0x00 0x00 0x60 0x04 + +# CHECK: vllebrze %v0, 4095 +0xe6 0x00 0x0f 0xff 0x60 0x04 + +# CHECK: vllebrze %v0, 0(%r15) +0xe6 0x00 0xf0 0x00 0x60 0x04 + +# CHECK: vllebrze %v0, 0(%r15,%r1) +0xe6 0x0f 0x10 0x00 0x60 0x04 + +# CHECK: vllebrze %v15, 0 +0xe6 0xf0 0x00 0x00 0x60 0x04 + +# CHECK: vllebrze %v31, 0 +0xe6 0xf0 0x00 0x00 0x68 0x04 + +# CHECK: vllebrze %v18, 1383(%r3,%r4) +0xe6 0x23 0x45 0x67 0x68 0x04 + +# CHECK: vllebrzf %v0, 0 +0xe6 0x00 0x00 0x00 0x20 0x04 + +# CHECK: vllebrzf %v0, 4095 +0xe6 0x00 0x0f 0xff 0x20 0x04 + +# CHECK: vllebrzf %v0, 0(%r15) +0xe6 0x00 0xf0 0x00 0x20 0x04 + +# CHECK: vllebrzf %v0, 0(%r15,%r1) +0xe6 0x0f 0x10 0x00 0x20 0x04 + +# CHECK: vllebrzf %v15, 0 +0xe6 0xf0 0x00 0x00 0x20 0x04 + +# CHECK: vllebrzf %v31, 0 +0xe6 0xf0 0x00 0x00 0x28 0x04 + +# CHECK: vllebrzf %v18, 1383(%r3,%r4) +0xe6 0x23 0x45 0x67 0x28 0x04 + +# CHECK: vllebrzg %v0, 0 +0xe6 0x00 0x00 0x00 0x30 0x04 + +# CHECK: vllebrzg %v0, 4095 +0xe6 0x00 0x0f 0xff 0x30 0x04 + +# CHECK: vllebrzg %v0, 0(%r15) +0xe6 0x00 0xf0 0x00 0x30 0x04 + +# CHECK: vllebrzg %v0, 0(%r15,%r1) +0xe6 0x0f 0x10 0x00 0x30 0x04 + +# CHECK: vllebrzg %v15, 0 +0xe6 0xf0 0x00 0x00 0x30 0x04 + +# CHECK: vllebrzg %v31, 0 +0xe6 0xf0 0x00 0x00 0x38 0x04 + +# CHECK: vllebrzg %v18, 1383(%r3,%r4) +0xe6 0x23 0x45 0x67 0x38 0x04 + +# CHECK: vllebrzh %v0, 0 +0xe6 0x00 0x00 0x00 0x10 0x04 + +# CHECK: vllebrzh %v0, 4095 +0xe6 0x00 0x0f 0xff 0x10 0x04 + +# CHECK: vllebrzh %v0, 0(%r15) +0xe6 0x00 0xf0 0x00 0x10 0x04 + +# CHECK: vllebrzh %v0, 0(%r15,%r1) +0xe6 0x0f 0x10 0x00 0x10 0x04 + +# CHECK: vllebrzh %v15, 0 +0xe6 0xf0 0x00 0x00 0x10 0x04 + +# CHECK: vllebrzh %v31, 0 +0xe6 0xf0 0x00 0x00 0x18 0x04 + +# CHECK: vllebrzh %v18, 1383(%r3,%r4) +0xe6 0x23 0x45 0x67 0x18 0x04 + +# CHECK: vsld %v0, %v0, %v0, 0 +0xe7 0x00 0x00 0x00 0x00 0x86 + +# CHECK: vsld %v0, %v0, %v0, 255 +0xe7 0x00 0x00 0xff 0x00 0x86 + +# CHECK: vsld %v0, %v0, %v31, 0 +0xe7 0x00 0xf0 0x00 0x02 0x86 + +# CHECK: vsld %v0, %v31, %v0, 0 +0xe7 0x0f 0x00 0x00 0x04 0x86 + +# CHECK: vsld %v31, %v0, %v0, 0 +0xe7 0xf0 0x00 0x00 0x08 0x86 + +# CHECK: vsld %v13, %v17, %v21, 121 +0xe7 0xd1 0x50 0x79 0x06 0x86 + +# CHECK: vsrd %v0, %v0, %v0, 0 +0xe7 0x00 0x00 0x00 0x00 0x87 + +# CHECK: vsrd %v0, %v0, %v0, 255 +0xe7 0x00 0x00 0xff 0x00 0x87 + +# CHECK: vsrd %v0, %v0, %v31, 0 +0xe7 0x00 0xf0 0x00 0x02 0x87 + +# CHECK: vsrd %v0, %v31, %v0, 0 +0xe7 0x0f 0x00 0x00 0x04 0x87 + +# CHECK: vsrd %v31, %v0, %v0, 0 +0xe7 0xf0 0x00 0x00 0x08 0x87 + +# CHECK: vsrd %v13, %v17, %v21, 121 +0xe7 0xd1 0x50 0x79 0x06 0x87 + +# CHECK: vstbr %v0, 0, 0 +0xe6 0x00 0x00 0x00 0x00 0x0e + +# CHECK: vstbr %v0, 0, 15 +0xe6 0x00 0x00 0x00 0xf0 0x0e + +# CHECK: vstbr %v0, 4095, 0 +0xe6 0x00 0x0f 0xff 0x00 0x0e + +# CHECK: vstbr %v0, 0(%r15), 0 +0xe6 0x00 0xf0 0x00 0x00 0x0e + +# CHECK: vstbr %v0, 0(%r15,%r1), 0 +0xe6 0x0f 0x10 0x00 0x00 0x0e + +# CHECK: vstbr %v15, 0, 0 +0xe6 0xf0 0x00 0x00 0x00 0x0e + +# CHECK: vstbr %v31, 0, 0 +0xe6 0xf0 0x00 0x00 0x08 0x0e + +# CHECK: vstbr %v18, 1383(%r3,%r4), 11 +0xe6 0x23 0x45 0x67 0xb8 0x0e + +# CHECK: vstbrf %v0, 0 +0xe6 0x00 0x00 0x00 0x20 0x0e + +# CHECK: vstbrf %v0, 4095 +0xe6 0x00 0x0f 0xff 0x20 0x0e + +# CHECK: vstbrf %v0, 0(%r15) +0xe6 0x00 0xf0 0x00 0x20 0x0e + +# CHECK: vstbrf %v0, 0(%r15,%r1) +0xe6 0x0f 0x10 0x00 0x20 0x0e + +# CHECK: vstbrf %v15, 0 +0xe6 0xf0 0x00 0x00 0x20 0x0e + +# CHECK: vstbrf %v31, 0 +0xe6 0xf0 0x00 0x00 0x28 0x0e + +# CHECK: vstbrf %v18, 1383(%r3,%r4) +0xe6 0x23 0x45 0x67 0x28 0x0e + +# CHECK: vstbrg %v0, 0 +0xe6 0x00 0x00 0x00 0x30 0x0e + +# CHECK: vstbrg %v0, 4095 +0xe6 0x00 0x0f 0xff 0x30 0x0e + +# CHECK: vstbrg %v0, 0(%r15) +0xe6 0x00 0xf0 0x00 0x30 0x0e + +# CHECK: vstbrg %v0, 0(%r15,%r1) +0xe6 0x0f 0x10 0x00 0x30 0x0e + +# CHECK: vstbrg %v15, 0 +0xe6 0xf0 0x00 0x00 0x30 0x0e + +# CHECK: vstbrg %v31, 0 +0xe6 0xf0 0x00 0x00 0x38 0x0e + +# CHECK: vstbrg %v18, 1383(%r3,%r4) +0xe6 0x23 0x45 0x67 0x38 0x0e + +# CHECK: vstbrh %v0, 0 +0xe6 0x00 0x00 0x00 0x10 0x0e + +# CHECK: vstbrh %v0, 4095 +0xe6 0x00 0x0f 0xff 0x10 0x0e + +# CHECK: vstbrh %v0, 0(%r15) +0xe6 0x00 0xf0 0x00 0x10 0x0e + +# CHECK: vstbrh %v0, 0(%r15,%r1) +0xe6 0x0f 0x10 0x00 0x10 0x0e + +# CHECK: vstbrh %v15, 0 +0xe6 0xf0 0x00 0x00 0x10 0x0e + +# CHECK: vstbrh %v31, 0 +0xe6 0xf0 0x00 0x00 0x18 0x0e + +# CHECK: vstbrh %v18, 1383(%r3,%r4) +0xe6 0x23 0x45 0x67 0x18 0x0e + +# CHECK: vstbrq %v0, 0 +0xe6 0x00 0x00 0x00 0x40 0x0e + +# CHECK: vstbrq %v0, 4095 +0xe6 0x00 0x0f 0xff 0x40 0x0e + +# CHECK: vstbrq %v0, 0(%r15) +0xe6 0x00 0xf0 0x00 0x40 0x0e + +# CHECK: vstbrq %v0, 0(%r15,%r1) +0xe6 0x0f 0x10 0x00 0x40 0x0e + +# CHECK: vstbrq %v15, 0 +0xe6 0xf0 0x00 0x00 0x40 0x0e + +# CHECK: vstbrq %v31, 0 +0xe6 0xf0 0x00 0x00 0x48 0x0e + +# CHECK: vstbrq %v18, 1383(%r3,%r4) +0xe6 0x23 0x45 0x67 0x48 0x0e + +# CHECK: vstebrf %v0, 0, 0 +0xe6 0x00 0x00 0x00 0x00 0x0b + +# CHECK: vstebrf %v0, 0, 3 +0xe6 0x00 0x00 0x00 0x30 0x0b + +# CHECK: vstebrf %v0, 4095, 0 +0xe6 0x00 0x0f 0xff 0x00 0x0b + +# CHECK: vstebrf %v0, 0(%r15), 0 +0xe6 0x00 0xf0 0x00 0x00 0x0b + +# CHECK: vstebrf %v0, 0(%r15,%r1), 0 +0xe6 0x0f 0x10 0x00 0x00 0x0b + +# CHECK: vstebrf %v15, 0, 0 +0xe6 0xf0 0x00 0x00 0x00 0x0b + +# CHECK: vstebrf %v31, 0, 0 +0xe6 0xf0 0x00 0x00 0x08 0x0b + +# CHECK: vstebrf %v18, 1383(%r3,%r4), 2 +0xe6 0x23 0x45 0x67 0x28 0x0b + +# CHECK: vstebrg %v0, 0, 0 +0xe6 0x00 0x00 0x00 0x00 0x0a + +# CHECK: vstebrg %v0, 0, 1 +0xe6 0x00 0x00 0x00 0x10 0x0a + +# CHECK: vstebrg %v0, 4095, 0 +0xe6 0x00 0x0f 0xff 0x00 0x0a + +# CHECK: vstebrg %v0, 0(%r15), 0 +0xe6 0x00 0xf0 0x00 0x00 0x0a + +# CHECK: vstebrg %v0, 0(%r15,%r1), 0 +0xe6 0x0f 0x10 0x00 0x00 0x0a + +# CHECK: vstebrg %v15, 0, 0 +0xe6 0xf0 0x00 0x00 0x00 0x0a + +# CHECK: vstebrg %v31, 0, 0 +0xe6 0xf0 0x00 0x00 0x08 0x0a + +# CHECK: vstebrg %v18, 1383(%r3,%r4), 1 +0xe6 0x23 0x45 0x67 0x18 0x0a + +# CHECK: vstebrh %v0, 0, 0 +0xe6 0x00 0x00 0x00 0x00 0x09 + +# CHECK: vstebrh %v0, 0, 7 +0xe6 0x00 0x00 0x00 0x70 0x09 + +# CHECK: vstebrh %v0, 4095, 0 +0xe6 0x00 0x0f 0xff 0x00 0x09 + +# CHECK: vstebrh %v0, 0(%r15), 0 +0xe6 0x00 0xf0 0x00 0x00 0x09 + +# CHECK: vstebrh %v0, 0(%r15,%r1), 0 +0xe6 0x0f 0x10 0x00 0x00 0x09 + +# CHECK: vstebrh %v15, 0, 0 +0xe6 0xf0 0x00 0x00 0x00 0x09 + +# CHECK: vstebrh %v31, 0, 0 +0xe6 0xf0 0x00 0x00 0x08 0x09 + +# CHECK: vstebrh %v18, 1383(%r3,%r4), 4 +0xe6 0x23 0x45 0x67 0x48 0x09 + +# CHECK: vster %v0, 0, 0 +0xe6 0x00 0x00 0x00 0x00 0x0f + +# CHECK: vster %v0, 0, 15 +0xe6 0x00 0x00 0x00 0xf0 0x0f + +# CHECK: vster %v0, 4095, 0 +0xe6 0x00 0x0f 0xff 0x00 0x0f + +# CHECK: vster %v0, 0(%r15), 0 +0xe6 0x00 0xf0 0x00 0x00 0x0f + +# CHECK: vster %v0, 0(%r15,%r1), 0 +0xe6 0x0f 0x10 0x00 0x00 0x0f + +# CHECK: vster %v15, 0, 0 +0xe6 0xf0 0x00 0x00 0x00 0x0f + +# CHECK: vster %v31, 0, 0 +0xe6 0xf0 0x00 0x00 0x08 0x0f + +# CHECK: vster %v18, 1383(%r3,%r4), 11 +0xe6 0x23 0x45 0x67 0xb8 0x0f + +# CHECK: vsterf %v0, 0 +0xe6 0x00 0x00 0x00 0x20 0x0f + +# CHECK: vsterf %v0, 4095 +0xe6 0x00 0x0f 0xff 0x20 0x0f + +# CHECK: vsterf %v0, 0(%r15) +0xe6 0x00 0xf0 0x00 0x20 0x0f + +# CHECK: vsterf %v0, 0(%r15,%r1) +0xe6 0x0f 0x10 0x00 0x20 0x0f + +# CHECK: vsterf %v15, 0 +0xe6 0xf0 0x00 0x00 0x20 0x0f + +# CHECK: vsterf %v31, 0 +0xe6 0xf0 0x00 0x00 0x28 0x0f + +# CHECK: vsterf %v18, 1383(%r3,%r4) +0xe6 0x23 0x45 0x67 0x28 0x0f + +# CHECK: vsterg %v0, 0 +0xe6 0x00 0x00 0x00 0x30 0x0f + +# CHECK: vsterg %v0, 4095 +0xe6 0x00 0x0f 0xff 0x30 0x0f + +# CHECK: vsterg %v0, 0(%r15) +0xe6 0x00 0xf0 0x00 0x30 0x0f + +# CHECK: vsterg %v0, 0(%r15,%r1) +0xe6 0x0f 0x10 0x00 0x30 0x0f + +# CHECK: vsterg %v15, 0 +0xe6 0xf0 0x00 0x00 0x30 0x0f + +# CHECK: vsterg %v31, 0 +0xe6 0xf0 0x00 0x00 0x38 0x0f + +# CHECK: vsterg %v18, 1383(%r3,%r4) +0xe6 0x23 0x45 0x67 0x38 0x0f + +# CHECK: vsterh %v0, 0 +0xe6 0x00 0x00 0x00 0x10 0x0f + +# CHECK: vsterh %v0, 4095 +0xe6 0x00 0x0f 0xff 0x10 0x0f + +# CHECK: vsterh %v0, 0(%r15) +0xe6 0x00 0xf0 0x00 0x10 0x0f + +# CHECK: vsterh %v0, 0(%r15,%r1) +0xe6 0x0f 0x10 0x00 0x10 0x0f + +# CHECK: vsterh %v15, 0 +0xe6 0xf0 0x00 0x00 0x10 0x0f + +# CHECK: vsterh %v31, 0 +0xe6 0xf0 0x00 0x00 0x18 0x0f + +# CHECK: vsterh %v18, 1383(%r3,%r4) +0xe6 0x23 0x45 0x67 0x18 0x0f + +# CHECK: vstrs %v0, %v0, %v0, %v0, 11, 0 +0xe7 0x00 0x0b 0x00 0x00 0x8b + +# CHECK: vstrs %v0, %v0, %v0, %v0, 11, 12 +0xe7 0x00 0x0b 0xc0 0x00 0x8b + +# CHECK: vstrs %v18, %v3, %v20, %v5, 11, 0 +0xe7 0x23 0x4b 0x00 0x5a 0x8b + +# CHECK: vstrs %v31, %v31, %v31, %v31, 11, 4 +0xe7 0xff 0xfb 0x40 0xff 0x8b + +# CHECK: vstrsb %v0, %v0, %v0, %v0, 0 +0xe7 0x00 0x00 0x00 0x00 0x8b + +# CHECK: vstrsb %v0, %v0, %v0, %v0, 0 +0xe7 0x00 0x00 0x00 0x00 0x8b + +# CHECK: vstrsb %v0, %v0, %v0, %v0, 12 +0xe7 0x00 0x00 0xc0 0x00 0x8b + +# CHECK: vstrsb %v0, %v0, %v0, %v15, 0 +0xe7 0x00 0x00 0x00 0xf0 0x8b + +# CHECK: vstrsb %v0, %v0, %v0, %v31, 0 +0xe7 0x00 0x00 0x00 0xf1 0x8b + +# CHECK: vstrsb %v0, %v0, %v15, %v0, 0 +0xe7 0x00 0xf0 0x00 0x00 0x8b + +# CHECK: vstrsb %v0, %v0, %v31, %v0, 0 +0xe7 0x00 0xf0 0x00 0x02 0x8b + +# CHECK: vstrsb %v0, %v15, %v0, %v0, 0 +0xe7 0x0f 0x00 0x00 0x00 0x8b + +# CHECK: vstrsb %v0, %v31, %v0, %v0, 0 +0xe7 0x0f 0x00 0x00 0x04 0x8b + +# CHECK: vstrsb %v15, %v0, %v0, %v0, 0 +0xe7 0xf0 0x00 0x00 0x00 0x8b + +# CHECK: vstrsb %v31, %v0, %v0, %v0, 0 +0xe7 0xf0 0x00 0x00 0x08 0x8b + +# CHECK: vstrsb %v18, %v3, %v20, %v5, 4 +0xe7 0x23 0x40 0x40 0x5a 0x8b + +# CHECK: vstrsb %v18, %v3, %v20, %v5, 12 +0xe7 0x23 0x40 0xc0 0x5a 0x8b + +# CHECK: vstrszb %v18, %v3, %v20, %v5 +0xe7 0x23 0x40 0x20 0x5a 0x8b + +# CHECK: vstrsf %v0, %v0, %v0, %v0, 0 +0xe7 0x00 0x02 0x00 0x00 0x8b + +# CHECK: vstrsf %v0, %v0, %v0, %v0, 0 +0xe7 0x00 0x02 0x00 0x00 0x8b + +# CHECK: vstrsf %v0, %v0, %v0, %v0, 12 +0xe7 0x00 0x02 0xc0 0x00 0x8b + +# CHECK: vstrsf %v0, %v0, %v0, %v15, 0 +0xe7 0x00 0x02 0x00 0xf0 0x8b + +# CHECK: vstrsf %v0, %v0, %v0, %v31, 0 +0xe7 0x00 0x02 0x00 0xf1 0x8b + +# CHECK: vstrsf %v0, %v0, %v15, %v0, 0 +0xe7 0x00 0xf2 0x00 0x00 0x8b + +# CHECK: vstrsf %v0, %v0, %v31, %v0, 0 +0xe7 0x00 0xf2 0x00 0x02 0x8b + +# CHECK: vstrsf %v0, %v15, %v0, %v0, 0 +0xe7 0x0f 0x02 0x00 0x00 0x8b + +# CHECK: vstrsf %v0, %v31, %v0, %v0, 0 +0xe7 0x0f 0x02 0x00 0x04 0x8b + +# CHECK: vstrsf %v15, %v0, %v0, %v0, 0 +0xe7 0xf0 0x02 0x00 0x00 0x8b + +# CHECK: vstrsf %v31, %v0, %v0, %v0, 0 +0xe7 0xf0 0x02 0x00 0x08 0x8b + +# CHECK: vstrsf %v18, %v3, %v20, %v5, 4 +0xe7 0x23 0x42 0x40 0x5a 0x8b + +# CHECK: vstrsf %v18, %v3, %v20, %v5, 12 +0xe7 0x23 0x42 0xc0 0x5a 0x8b + +# CHECK: vstrszf %v18, %v3, %v20, %v5 +0xe7 0x23 0x42 0x20 0x5a 0x8b + +# CHECK: vstrsh %v0, %v0, %v0, %v0, 0 +0xe7 0x00 0x01 0x00 0x00 0x8b + +# CHECK: vstrsh %v0, %v0, %v0, %v0, 0 +0xe7 0x00 0x01 0x00 0x00 0x8b + +# CHECK: vstrsh %v0, %v0, %v0, %v0, 12 +0xe7 0x00 0x01 0xc0 0x00 0x8b + +# CHECK: vstrsh %v0, %v0, %v0, %v15, 0 +0xe7 0x00 0x01 0x00 0xf0 0x8b + +# CHECK: vstrsh %v0, %v0, %v0, %v31, 0 +0xe7 0x00 0x01 0x00 0xf1 0x8b + +# CHECK: vstrsh %v0, %v0, %v15, %v0, 0 +0xe7 0x00 0xf1 0x00 0x00 0x8b + +# CHECK: vstrsh %v0, %v0, %v31, %v0, 0 +0xe7 0x00 0xf1 0x00 0x02 0x8b + +# CHECK: vstrsh %v0, %v15, %v0, %v0, 0 +0xe7 0x0f 0x01 0x00 0x00 0x8b + +# CHECK: vstrsh %v0, %v31, %v0, %v0, 0 +0xe7 0x0f 0x01 0x00 0x04 0x8b + +# CHECK: vstrsh %v15, %v0, %v0, %v0, 0 +0xe7 0xf0 0x01 0x00 0x00 0x8b + +# CHECK: vstrsh %v31, %v0, %v0, %v0, 0 +0xe7 0xf0 0x01 0x00 0x08 0x8b + +# CHECK: vstrsh %v18, %v3, %v20, %v5, 4 +0xe7 0x23 0x41 0x40 0x5a 0x8b + +# CHECK: vstrsh %v18, %v3, %v20, %v5, 12 +0xe7 0x23 0x41 0xc0 0x5a 0x8b + +# CHECK: vstrszh %v18, %v3, %v20, %v5 +0xe7 0x23 0x41 0x20 0x5a 0x8b + +# CHECK: wcefb %f0, %f0, 0, 0 +0xe7 0x00 0x00 0x08 0x20 0xc3 + +# CHECK: wcefb %f0, %f0, 0, 0 +0xe7 0x00 0x00 0x08 0x20 0xc3 + +# CHECK: wcefb %f0, %f0, 0, 15 +0xe7 0x00 0x00 0xf8 0x20 0xc3 + +# CHECK: wcefb %f0, %f0, 4, 0 +0xe7 0x00 0x00 0x0c 0x20 0xc3 + +# CHECK: wcefb %f0, %v31, 0, 0 +0xe7 0x0f 0x00 0x08 0x24 0xc3 + +# CHECK: wcefb %v31, %f0, 0, 0 +0xe7 0xf0 0x00 0x08 0x28 0xc3 + +# CHECK: wcefb %f14, %v17, 4, 10 +0xe7 0xe1 0x00 0xac 0x24 0xc3 + +# CHECK: wcelfb %f0, %f0, 0, 0 +0xe7 0x00 0x00 0x08 0x20 0xc1 + +# CHECK: wcelfb %f0, %f0, 0, 0 +0xe7 0x00 0x00 0x08 0x20 0xc1 + +# CHECK: wcelfb %f0, %f0, 0, 15 +0xe7 0x00 0x00 0xf8 0x20 0xc1 + +# CHECK: wcelfb %f0, %f0, 4, 0 +0xe7 0x00 0x00 0x0c 0x20 0xc1 + +# CHECK: wcelfb %f0, %v31, 0, 0 +0xe7 0x0f 0x00 0x08 0x24 0xc1 + +# CHECK: wcelfb %v31, %f0, 0, 0 +0xe7 0xf0 0x00 0x08 0x28 0xc1 + +# CHECK: wcelfb %f14, %v17, 4, 10 +0xe7 0xe1 0x00 0xac 0x24 0xc1 + +# CHECK: wcfeb %f0, %f0, 0, 0 +0xe7 0x00 0x00 0x08 0x20 0xc2 + +# CHECK: wcfeb %f0, %f0, 0, 0 +0xe7 0x00 0x00 0x08 0x20 0xc2 + +# CHECK: wcfeb %f0, %f0, 0, 15 +0xe7 0x00 0x00 0xf8 0x20 0xc2 + +# CHECK: wcfeb %f0, %f0, 4, 0 +0xe7 0x00 0x00 0x0c 0x20 0xc2 + +# CHECK: wcfeb %f0, %v31, 0, 0 +0xe7 0x0f 0x00 0x08 0x24 0xc2 + +# CHECK: wcfeb %v31, %f0, 0, 0 +0xe7 0xf0 0x00 0x08 0x28 0xc2 + +# CHECK: wcfeb %f14, %v17, 4, 10 +0xe7 0xe1 0x00 0xac 0x24 0xc2 + +# CHECK: wclfeb %f0, %f0, 0, 0 +0xe7 0x00 0x00 0x08 0x20 0xc0 + +# CHECK: wclfeb %f0, %f0, 0, 0 +0xe7 0x00 0x00 0x08 0x20 0xc0 + +# CHECK: wclfeb %f0, %f0, 0, 15 +0xe7 0x00 0x00 0xf8 0x20 0xc0 + +# CHECK: wclfeb %f0, %f0, 4, 0 +0xe7 0x00 0x00 0x0c 0x20 0xc0 + +# CHECK: wclfeb %f0, %v31, 0, 0 +0xe7 0x0f 0x00 0x08 0x24 0xc0 + +# CHECK: wclfeb %v31, %f0, 0, 0 +0xe7 0xf0 0x00 0x08 0x28 0xc0 + +# CHECK: wclfeb %f14, %v17, 4, 10 +0xe7 0xe1 0x00 0xac 0x24 0xc0 diff --git a/llvm/test/MC/SystemZ/insn-bad-arch13.s b/llvm/test/MC/SystemZ/insn-bad-arch13.s new file mode 100644 index 00000000000..ad84e55b69f --- /dev/null +++ b/llvm/test/MC/SystemZ/insn-bad-arch13.s @@ -0,0 +1,881 @@ +# For arch13 only. +# RUN: not llvm-mc -triple s390x-linux-gnu -mcpu=arch13 < %s 2> %t +# RUN: FileCheck < %t %s + +#CHECK: error: invalid register pair +#CHECK: dfltcc %r1, %r2, %r4 +#CHECK: error: invalid register pair +#CHECK: dfltcc %r2, %r1, %r4 + + dfltcc %r1, %r2, %r4 + dfltcc %r2, %r1, %r4 + +#CHECK: error: invalid register pair +#CHECK: kdsa %r0, %r1 + + kdsa %r0, %r1 + +#CHECK: error: invalid operand +#CHECK: ldrv %f0, -1 +#CHECK: error: invalid operand +#CHECK: ldrv %f0, 4096 +#CHECK: error: invalid use of vector addressing +#CHECK: ldrv %f0, 0(%v1,%r2) + + ldrv %f0, -1 + ldrv %f0, 4096 + ldrv %f0, 0(%v1,%r2) + +#CHECK: error: invalid operand +#CHECK: lerv %f0, -1 +#CHECK: error: invalid operand +#CHECK: lerv %f0, 4096 +#CHECK: error: invalid use of vector addressing +#CHECK: lerv %f0, 0(%v1,%r2) + + lerv %f0, -1 + lerv %f0, 4096 + lerv %f0, 0(%v1,%r2) + +#CHECK: error: invalid use of indexed addressing +#CHECK: mvcrl 160(%r1,%r15),160(%r15) +#CHECK: error: invalid operand +#CHECK: mvcrl -1(%r1),160(%r15) +#CHECK: error: invalid operand +#CHECK: mvcrl 4096(%r1),160(%r15) +#CHECK: error: invalid operand +#CHECK: mvcrl 0(%r1),-1(%r15) +#CHECK: error: invalid operand +#CHECK: mvcrl 0(%r1),4096(%r15) + + mvcrl 160(%r1,%r15),160(%r15) + mvcrl -1(%r1),160(%r15) + mvcrl 4096(%r1),160(%r15) + mvcrl 0(%r1),-1(%r15) + mvcrl 0(%r1),4096(%r15) + +#CHECK: error: invalid operand +#CHECK: popcnt %r2, %r4, -1 +#CHECK: error: invalid operand +#CHECK: popcnt %r2, %r4, 16 + + popcnt %r2, %r4, -1 + popcnt %r2, %r4, 16 + +#CHECK: error: invalid operand +#CHECK: selgr %r0, %r0, %r0, -1 +#CHECK: error: invalid operand +#CHECK: selgr %r0, %r0, %r0, 16 + + selgr %r0, %r0, %r0, -1 + selgr %r0, %r0, %r0, 16 + +#CHECK: error: invalid operand +#CHECK: selfhr %r0, %r0, %r0, -1 +#CHECK: error: invalid operand +#CHECK: selfhr %r0, %r0, %r0, 16 + + selfhr %r0, %r0, %r0, -1 + selfhr %r0, %r0, %r0, 16 + +#CHECK: error: invalid operand +#CHECK: selr %r0, %r0, %r0, -1 +#CHECK: error: invalid operand +#CHECK: selr %r0, %r0, %r0, 16 + + selr %r0, %r0, %r0, -1 + selr %r0, %r0, %r0, 16 + +#CHECK: error: invalid register pair +#CHECK: sortl %r1, %r2 +#CHECK: error: invalid register pair +#CHECK: sortl %r2, %r1 + + sortl %r1, %r2 + sortl %r2, %r1 + +#CHECK: error: invalid operand +#CHECK: stdrv %f0, -1 +#CHECK: error: invalid operand +#CHECK: stdrv %f0, 4096 +#CHECK: error: invalid use of vector addressing +#CHECK: stdrv %f0, 0(%v1,%r2) + + stdrv %f0, -1 + stdrv %f0, 4096 + stdrv %f0, 0(%v1,%r2) + +#CHECK: error: invalid operand +#CHECK: sterv %f0, -1 +#CHECK: error: invalid operand +#CHECK: sterv %f0, 4096 +#CHECK: error: invalid use of vector addressing +#CHECK: sterv %f0, 0(%v1,%r2) + + sterv %f0, -1 + sterv %f0, 4096 + sterv %f0, 0(%v1,%r2) + +#CHECK: error: invalid operand +#CHECK: vcefb %v0, %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vcefb %v0, %v0, 0, 16 +#CHECK: error: invalid operand +#CHECK: vcefb %v0, %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: vcefb %v0, %v0, 16, 0 + + vcefb %v0, %v0, 0, -1 + vcefb %v0, %v0, 0, 16 + vcefb %v0, %v0, -1, 0 + vcefb %v0, %v0, 16, 0 + +#CHECK: error: invalid operand +#CHECK: vcelfb %v0, %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vcelfb %v0, %v0, 0, 16 +#CHECK: error: invalid operand +#CHECK: vcelfb %v0, %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: vcelfb %v0, %v0, 16, 0 + + vcelfb %v0, %v0, 0, -1 + vcelfb %v0, %v0, 0, 16 + vcelfb %v0, %v0, -1, 0 + vcelfb %v0, %v0, 16, 0 + +#CHECK: error: invalid operand +#CHECK: vcfeb %v0, %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vcfeb %v0, %v0, 0, 16 +#CHECK: error: invalid operand +#CHECK: vcfeb %v0, %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: vcfeb %v0, %v0, 16, 0 + + vcfeb %v0, %v0, 0, -1 + vcfeb %v0, %v0, 0, 16 + vcfeb %v0, %v0, -1, 0 + vcfeb %v0, %v0, 16, 0 + +#CHECK: error: invalid operand +#CHECK: vcfpl %v0, %v0, 0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vcfpl %v0, %v0, 0, 0, 16 +#CHECK: error: invalid operand +#CHECK: vcfpl %v0, %v0, 0, -1, 0 +#CHECK: error: invalid operand +#CHECK: vcfpl %v0, %v0, 0, 16, 0 +#CHECK: error: invalid operand +#CHECK: vcfpl %v0, %v0, -1, 0, 0 +#CHECK: error: invalid operand +#CHECK: vcfpl %v0, %v0, 16, 0, 0 + + vcfpl %v0, %v0, 0, 0, -1 + vcfpl %v0, %v0, 0, 0, 16 + vcfpl %v0, %v0, 0, -1, 0 + vcfpl %v0, %v0, 0, 16, 0 + vcfpl %v0, %v0, -1, 0, 0 + vcfpl %v0, %v0, 16, 0, 0 + +#CHECK: error: invalid operand +#CHECK: vcfps %v0, %v0, 0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vcfps %v0, %v0, 0, 0, 16 +#CHECK: error: invalid operand +#CHECK: vcfps %v0, %v0, 0, -1, 0 +#CHECK: error: invalid operand +#CHECK: vcfps %v0, %v0, 0, 16, 0 +#CHECK: error: invalid operand +#CHECK: vcfps %v0, %v0, -1, 0, 0 +#CHECK: error: invalid operand +#CHECK: vcfps %v0, %v0, 16, 0, 0 + + vcfps %v0, %v0, 0, 0, -1 + vcfps %v0, %v0, 0, 0, 16 + vcfps %v0, %v0, 0, -1, 0 + vcfps %v0, %v0, 0, 16, 0 + vcfps %v0, %v0, -1, 0, 0 + vcfps %v0, %v0, 16, 0, 0 + +#CHECK: error: invalid operand +#CHECK: vclfeb %v0, %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vclfeb %v0, %v0, 0, 16 +#CHECK: error: invalid operand +#CHECK: vclfeb %v0, %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: vclfeb %v0, %v0, 16, 0 + + vclfeb %v0, %v0, 0, -1 + vclfeb %v0, %v0, 0, 16 + vclfeb %v0, %v0, -1, 0 + vclfeb %v0, %v0, 16, 0 + +#CHECK: error: invalid operand +#CHECK: vclfp %v0, %v0, 0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vclfp %v0, %v0, 0, 0, 16 +#CHECK: error: invalid operand +#CHECK: vclfp %v0, %v0, 0, -1, 0 +#CHECK: error: invalid operand +#CHECK: vclfp %v0, %v0, 0, 16, 0 +#CHECK: error: invalid operand +#CHECK: vclfp %v0, %v0, -1, 0, 0 +#CHECK: error: invalid operand +#CHECK: vclfp %v0, %v0, 16, 0, 0 + + vclfp %v0, %v0, 0, 0, -1 + vclfp %v0, %v0, 0, 0, 16 + vclfp %v0, %v0, 0, -1, 0 + vclfp %v0, %v0, 0, 16, 0 + vclfp %v0, %v0, -1, 0, 0 + vclfp %v0, %v0, 16, 0, 0 + +#CHECK: error: invalid operand +#CHECK: vcsfp %v0, %v0, 0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vcsfp %v0, %v0, 0, 0, 16 +#CHECK: error: invalid operand +#CHECK: vcsfp %v0, %v0, 0, -1, 0 +#CHECK: error: invalid operand +#CHECK: vcsfp %v0, %v0, 0, 16, 0 +#CHECK: error: invalid operand +#CHECK: vcsfp %v0, %v0, -1, 0, 0 +#CHECK: error: invalid operand +#CHECK: vcsfp %v0, %v0, 16, 0, 0 + + vcsfp %v0, %v0, 0, 0, -1 + vcsfp %v0, %v0, 0, 0, 16 + vcsfp %v0, %v0, 0, -1, 0 + vcsfp %v0, %v0, 0, 16, 0 + vcsfp %v0, %v0, -1, 0, 0 + vcsfp %v0, %v0, 16, 0, 0 + +#CHECK: error: invalid operand +#CHECK: vcvb %r0, %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vcvb %r0, %v0, 0, 16 + + vcvb %r0, %v0, 0, -1 + vcvb %r0, %v0, 0, 16 + +#CHECK: error: invalid operand +#CHECK: vcvbg %r0, %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vcvbg %r0, %v0, 0, 16 + + vcvbg %r0, %v0, 0, -1 + vcvbg %r0, %v0, 0, 16 + +#CHECK: error: invalid operand +#CHECK: vlbr %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vlbr %v0, 0, 16 +#CHECK: error: invalid operand +#CHECK: vlbr %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: vlbr %v0, 4096, 0 +#CHECK: error: invalid use of vector addressing +#CHECK: vlbr %v0, 0(%v1,%r2), 0 + + vlbr %v0, 0, -1 + vlbr %v0, 0, 16 + vlbr %v0, -1, 0 + vlbr %v0, 4096, 0 + vlbr %v0, 0(%v1,%r2), 0 + +#CHECK: error: invalid operand +#CHECK: vlbrf %v0, -1 +#CHECK: error: invalid operand +#CHECK: vlbrf %v0, 4096 +#CHECK: error: invalid use of vector addressing +#CHECK: vlbrf %v0, 0(%v1,%r2) + + vlbrf %v0, -1 + vlbrf %v0, 4096 + vlbrf %v0, 0(%v1,%r2) + +#CHECK: error: invalid operand +#CHECK: vlbrg %v0, -1 +#CHECK: error: invalid operand +#CHECK: vlbrg %v0, 4096 +#CHECK: error: invalid use of vector addressing +#CHECK: vlbrg %v0, 0(%v1,%r2) + + vlbrg %v0, -1 + vlbrg %v0, 4096 + vlbrg %v0, 0(%v1,%r2) + +#CHECK: error: invalid operand +#CHECK: vlbrh %v0, -1 +#CHECK: error: invalid operand +#CHECK: vlbrh %v0, 4096 +#CHECK: error: invalid use of vector addressing +#CHECK: vlbrh %v0, 0(%v1,%r2) + + vlbrh %v0, -1 + vlbrh %v0, 4096 + vlbrh %v0, 0(%v1,%r2) + +#CHECK: error: invalid operand +#CHECK: vlbrq %v0, -1 +#CHECK: error: invalid operand +#CHECK: vlbrq %v0, 4096 +#CHECK: error: invalid use of vector addressing +#CHECK: vlbrq %v0, 0(%v1,%r2) + + vlbrq %v0, -1 + vlbrq %v0, 4096 + vlbrq %v0, 0(%v1,%r2) + +#CHECK: error: invalid operand +#CHECK: vlbrrep %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vlbrrep %v0, 0, 16 +#CHECK: error: invalid operand +#CHECK: vlbrrep %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: vlbrrep %v0, 4096, 0 +#CHECK: error: invalid use of vector addressing +#CHECK: vlbrrep %v0, 0(%v1,%r2), 0 + + vlbrrep %v0, 0, -1 + vlbrrep %v0, 0, 16 + vlbrrep %v0, -1, 0 + vlbrrep %v0, 4096, 0 + vlbrrep %v0, 0(%v1,%r2), 0 + +#CHECK: error: invalid operand +#CHECK: vlbrrepf %v0, -1 +#CHECK: error: invalid operand +#CHECK: vlbrrepf %v0, 4096 +#CHECK: error: invalid use of vector addressing +#CHECK: vlbrrepf %v0, 0(%v1,%r2) + + vlbrrepf %v0, -1 + vlbrrepf %v0, 4096 + vlbrrepf %v0, 0(%v1,%r2) + +#CHECK: error: invalid operand +#CHECK: vlbrrepg %v0, -1 +#CHECK: error: invalid operand +#CHECK: vlbrrepg %v0, 4096 +#CHECK: error: invalid use of vector addressing +#CHECK: vlbrrepg %v0, 0(%v1,%r2) + + vlbrrepg %v0, -1 + vlbrrepg %v0, 4096 + vlbrrepg %v0, 0(%v1,%r2) + +#CHECK: error: invalid operand +#CHECK: vlbrreph %v0, -1 +#CHECK: error: invalid operand +#CHECK: vlbrreph %v0, 4096 +#CHECK: error: invalid use of vector addressing +#CHECK: vlbrreph %v0, 0(%v1,%r2) + + vlbrreph %v0, -1 + vlbrreph %v0, 4096 + vlbrreph %v0, 0(%v1,%r2) + +#CHECK: error: invalid operand +#CHECK: vlebrf %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vlebrf %v0, 0, 4 +#CHECK: error: invalid operand +#CHECK: vlebrf %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: vlebrf %v0, 4096, 0 +#CHECK: error: invalid use of vector addressing +#CHECK: vlebrf %v0, 0(%v1,%r2), 0 + + vlebrf %v0, 0, -1 + vlebrf %v0, 0, 4 + vlebrf %v0, -1, 0 + vlebrf %v0, 4096, 0 + vlebrf %v0, 0(%v1,%r2), 0 + +#CHECK: error: invalid operand +#CHECK: vlebrg %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vlebrg %v0, 0, 2 +#CHECK: error: invalid operand +#CHECK: vlebrg %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: vlebrg %v0, 4096, 0 +#CHECK: error: invalid use of vector addressing +#CHECK: vlebrg %v0, 0(%v1,%r2), 0 + + vlebrg %v0, 0, -1 + vlebrg %v0, 0, 2 + vlebrg %v0, -1, 0 + vlebrg %v0, 4096, 0 + vlebrg %v0, 0(%v1,%r2), 0 + +#CHECK: error: invalid operand +#CHECK: vlebrh %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vlebrh %v0, 0, 8 +#CHECK: error: invalid operand +#CHECK: vlebrh %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: vlebrh %v0, 4096, 0 +#CHECK: error: invalid use of vector addressing +#CHECK: vlebrh %v0, 0(%v1,%r2), 0 + + vlebrh %v0, 0, -1 + vlebrh %v0, 0, 8 + vlebrh %v0, -1, 0 + vlebrh %v0, 4096, 0 + vlebrh %v0, 0(%v1,%r2), 0 + +#CHECK: error: invalid operand +#CHECK: vler %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vler %v0, 0, 16 +#CHECK: error: invalid operand +#CHECK: vler %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: vler %v0, 4096, 0 +#CHECK: error: invalid use of vector addressing +#CHECK: vler %v0, 0(%v1,%r2), 0 + + vler %v0, 0, -1 + vler %v0, 0, 16 + vler %v0, -1, 0 + vler %v0, 4096, 0 + vler %v0, 0(%v1,%r2), 0 + +#CHECK: error: invalid operand +#CHECK: vlerf %v0, -1 +#CHECK: error: invalid operand +#CHECK: vlerf %v0, 4096 +#CHECK: error: invalid use of vector addressing +#CHECK: vlerf %v0, 0(%v1,%r2) + + vlerf %v0, -1 + vlerf %v0, 4096 + vlerf %v0, 0(%v1,%r2) + +#CHECK: error: invalid operand +#CHECK: vlerg %v0, -1 +#CHECK: error: invalid operand +#CHECK: vlerg %v0, 4096 +#CHECK: error: invalid use of vector addressing +#CHECK: vlerg %v0, 0(%v1,%r2) + + vlerg %v0, -1 + vlerg %v0, 4096 + vlerg %v0, 0(%v1,%r2) + +#CHECK: error: invalid operand +#CHECK: vlerh %v0, -1 +#CHECK: error: invalid operand +#CHECK: vlerh %v0, 4096 +#CHECK: error: invalid use of vector addressing +#CHECK: vlerh %v0, 0(%v1,%r2) + + vlerh %v0, -1 + vlerh %v0, 4096 + vlerh %v0, 0(%v1,%r2) + +#CHECK: error: invalid operand +#CHECK: vllebrz %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vllebrz %v0, 0, 16 +#CHECK: error: invalid operand +#CHECK: vllebrz %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: vllebrz %v0, 4096, 0 +#CHECK: error: invalid use of vector addressing +#CHECK: vllebrz %v0, 0(%v1,%r2), 0 + + vllebrz %v0, 0, -1 + vllebrz %v0, 0, 16 + vllebrz %v0, -1, 0 + vllebrz %v0, 4096, 0 + vllebrz %v0, 0(%v1,%r2), 0 + +#CHECK: error: invalid operand +#CHECK: vllebrze %v0, -1 +#CHECK: error: invalid operand +#CHECK: vllebrze %v0, 4096 +#CHECK: error: invalid use of vector addressing +#CHECK: vllebrze %v0, 0(%v1,%r2) + + vllebrze %v0, -1 + vllebrze %v0, 4096 + vllebrze %v0, 0(%v1,%r2) + +#CHECK: error: invalid operand +#CHECK: vllebrzf %v0, -1 +#CHECK: error: invalid operand +#CHECK: vllebrzf %v0, 4096 +#CHECK: error: invalid use of vector addressing +#CHECK: vllebrzf %v0, 0(%v1,%r2) + + vllebrzf %v0, -1 + vllebrzf %v0, 4096 + vllebrzf %v0, 0(%v1,%r2) + +#CHECK: error: invalid operand +#CHECK: vllebrzg %v0, -1 +#CHECK: error: invalid operand +#CHECK: vllebrzg %v0, 4096 +#CHECK: error: invalid use of vector addressing +#CHECK: vllebrzg %v0, 0(%v1,%r2) + + vllebrzg %v0, -1 + vllebrzg %v0, 4096 + vllebrzg %v0, 0(%v1,%r2) + +#CHECK: error: invalid operand +#CHECK: vllebrzh %v0, -1 +#CHECK: error: invalid operand +#CHECK: vllebrzh %v0, 4096 +#CHECK: error: invalid use of vector addressing +#CHECK: vllebrzh %v0, 0(%v1,%r2) + + vllebrzh %v0, -1 + vllebrzh %v0, 4096 + vllebrzh %v0, 0(%v1,%r2) + +#CHECK: error: invalid operand +#CHECK: vsld %v0, %v0, %v0, -1 +#CHECK: error: invalid operand +#CHECK: vsld %v0, %v0, %v0, 256 + + vsld %v0, %v0, %v0, -1 + vsld %v0, %v0, %v0, 256 + +#CHECK: error: invalid operand +#CHECK: vsrd %v0, %v0, %v0, -1 +#CHECK: error: invalid operand +#CHECK: vsrd %v0, %v0, %v0, 256 + + vsrd %v0, %v0, %v0, -1 + vsrd %v0, %v0, %v0, 256 + +#CHECK: error: invalid operand +#CHECK: vstbr %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vstbr %v0, 0, 16 +#CHECK: error: invalid operand +#CHECK: vstbr %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: vstbr %v0, 4096, 0 +#CHECK: error: invalid use of vector addressing +#CHECK: vstbr %v0, 0(%v1,%r2), 0 + + vstbr %v0, 0, -1 + vstbr %v0, 0, 16 + vstbr %v0, -1, 0 + vstbr %v0, 4096, 0 + vstbr %v0, 0(%v1,%r2), 0 + +#CHECK: error: invalid operand +#CHECK: vstbrf %v0, -1 +#CHECK: error: invalid operand +#CHECK: vstbrf %v0, 4096 +#CHECK: error: invalid use of vector addressing +#CHECK: vstbrf %v0, 0(%v1,%r2) + + vstbrf %v0, -1 + vstbrf %v0, 4096 + vstbrf %v0, 0(%v1,%r2) + +#CHECK: error: invalid operand +#CHECK: vstbrg %v0, -1 +#CHECK: error: invalid operand +#CHECK: vstbrg %v0, 4096 +#CHECK: error: invalid use of vector addressing +#CHECK: vstbrg %v0, 0(%v1,%r2) + + vstbrg %v0, -1 + vstbrg %v0, 4096 + vstbrg %v0, 0(%v1,%r2) + +#CHECK: error: invalid operand +#CHECK: vstbrh %v0, -1 +#CHECK: error: invalid operand +#CHECK: vstbrh %v0, 4096 +#CHECK: error: invalid use of vector addressing +#CHECK: vstbrh %v0, 0(%v1,%r2) + + vstbrh %v0, -1 + vstbrh %v0, 4096 + vstbrh %v0, 0(%v1,%r2) + +#CHECK: error: invalid operand +#CHECK: vstbrq %v0, -1 +#CHECK: error: invalid operand +#CHECK: vstbrq %v0, 4096 +#CHECK: error: invalid use of vector addressing +#CHECK: vstbrq %v0, 0(%v1,%r2) + + vstbrq %v0, -1 + vstbrq %v0, 4096 + vstbrq %v0, 0(%v1,%r2) + +#CHECK: error: invalid operand +#CHECK: vstebrf %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vstebrf %v0, 0, 4 +#CHECK: error: invalid operand +#CHECK: vstebrf %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: vstebrf %v0, 4096, 0 +#CHECK: error: invalid use of vector addressing +#CHECK: vstebrf %v0, 0(%v1,%r2), 0 + + vstebrf %v0, 0, -1 + vstebrf %v0, 0, 4 + vstebrf %v0, -1, 0 + vstebrf %v0, 4096, 0 + vstebrf %v0, 0(%v1,%r2), 0 + +#CHECK: error: invalid operand +#CHECK: vstebrg %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vstebrg %v0, 0, 2 +#CHECK: error: invalid operand +#CHECK: vstebrg %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: vstebrg %v0, 4096, 0 +#CHECK: error: invalid use of vector addressing +#CHECK: vstebrg %v0, 0(%v1,%r2), 0 + + vstebrg %v0, 0, -1 + vstebrg %v0, 0, 2 + vstebrg %v0, -1, 0 + vstebrg %v0, 4096, 0 + vstebrg %v0, 0(%v1,%r2), 0 + +#CHECK: error: invalid operand +#CHECK: vstebrh %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vstebrh %v0, 0, 8 +#CHECK: error: invalid operand +#CHECK: vstebrh %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: vstebrh %v0, 4096, 0 +#CHECK: error: invalid use of vector addressing +#CHECK: vstebrh %v0, 0(%v1,%r2), 0 + + vstebrh %v0, 0, -1 + vstebrh %v0, 0, 8 + vstebrh %v0, -1, 0 + vstebrh %v0, 4096, 0 + vstebrh %v0, 0(%v1,%r2), 0 + +#CHECK: error: invalid operand +#CHECK: vster %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vster %v0, 0, 16 +#CHECK: error: invalid operand +#CHECK: vster %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: vster %v0, 4096, 0 +#CHECK: error: invalid use of vector addressing +#CHECK: vster %v0, 0(%v1,%r2), 0 + + vster %v0, 0, -1 + vster %v0, 0, 16 + vster %v0, -1, 0 + vster %v0, 4096, 0 + vster %v0, 0(%v1,%r2), 0 + +#CHECK: error: invalid operand +#CHECK: vsterf %v0, -1 +#CHECK: error: invalid operand +#CHECK: vsterf %v0, 4096 +#CHECK: error: invalid use of vector addressing +#CHECK: vsterf %v0, 0(%v1,%r2) + + vsterf %v0, -1 + vsterf %v0, 4096 + vsterf %v0, 0(%v1,%r2) + +#CHECK: error: invalid operand +#CHECK: vsterg %v0, -1 +#CHECK: error: invalid operand +#CHECK: vsterg %v0, 4096 +#CHECK: error: invalid use of vector addressing +#CHECK: vsterg %v0, 0(%v1,%r2) + + vsterg %v0, -1 + vsterg %v0, 4096 + vsterg %v0, 0(%v1,%r2) + +#CHECK: error: invalid operand +#CHECK: vsterh %v0, -1 +#CHECK: error: invalid operand +#CHECK: vsterh %v0, 4096 +#CHECK: error: invalid use of vector addressing +#CHECK: vsterh %v0, 0(%v1,%r2) + + vsterh %v0, -1 + vsterh %v0, 4096 + vsterh %v0, 0(%v1,%r2) + +#CHECK: error: invalid operand +#CHECK: vstrs %v0, %v0, %v0, %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: vstrs %v0, %v0, %v0, %v0, 0, 16 +#CHECK: error: invalid operand +#CHECK: vstrs %v0, %v0, %v0, %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: vstrs %v0, %v0, %v0, %v0, 16, 0 +#CHECK: error: too few operands +#CHECK: vstrs %v0, %v0, %v0, %v0 +#CHECK: error: invalid operand +#CHECK: vstrs %v0, %v0, %v0, %v0, 0, 0, 0 + + vstrs %v0, %v0, %v0, %v0, 0, -1 + vstrs %v0, %v0, %v0, %v0, 0, 16 + vstrs %v0, %v0, %v0, %v0, -1, 0 + vstrs %v0, %v0, %v0, %v0, 16, 0 + vstrs %v0, %v0, %v0, %v0 + vstrs %v0, %v0, %v0, %v0, 0, 0, 0 + +#CHECK: error: invalid operand +#CHECK: vstrsb %v0, %v0, %v0, %v0, -1 +#CHECK: error: invalid operand +#CHECK: vstrsb %v0, %v0, %v0, %v0, 16 +#CHECK: error: too few operands +#CHECK: vstrsb %v0, %v0, %v0 +#CHECK: error: invalid operand +#CHECK: vstrsb %v0, %v0, %v0, %v0, 0, 0 + + vstrsb %v0, %v0, %v0, %v0, -1 + vstrsb %v0, %v0, %v0, %v0, 16 + vstrsb %v0, %v0, %v0 + vstrsb %v0, %v0, %v0, %v0, 0, 0 + +#CHECK: error: invalid operand +#CHECK: vstrsf %v0, %v0, %v0, %v0, -1 +#CHECK: error: invalid operand +#CHECK: vstrsf %v0, %v0, %v0, %v0, 16 +#CHECK: error: too few operands +#CHECK: vstrsf %v0, %v0, %v0 +#CHECK: error: invalid operand +#CHECK: vstrsf %v0, %v0, %v0, %v0, 0, 0 + + vstrsf %v0, %v0, %v0, %v0, -1 + vstrsf %v0, %v0, %v0, %v0, 16 + vstrsf %v0, %v0, %v0 + vstrsf %v0, %v0, %v0, %v0, 0, 0 + +#CHECK: error: invalid operand +#CHECK: vstrsh %v0, %v0, %v0, %v0, -1 +#CHECK: error: invalid operand +#CHECK: vstrsh %v0, %v0, %v0, %v0, 16 +#CHECK: error: too few operands +#CHECK: vstrsh %v0, %v0, %v0 +#CHECK: error: invalid operand +#CHECK: vstrsh %v0, %v0, %v0, %v0, 0, 0 + + vstrsh %v0, %v0, %v0, %v0, -1 + vstrsh %v0, %v0, %v0, %v0, 16 + vstrsh %v0, %v0, %v0 + vstrsh %v0, %v0, %v0, %v0, 0, 0 + +#CHECK: error: invalid operand +#CHECK: vstrszb %v0, %v0, %v0, %v0, -1 +#CHECK: error: invalid operand +#CHECK: vstrszb %v0, %v0, %v0, %v0, 16 +#CHECK: error: too few operands +#CHECK: vstrszb %v0, %v0, %v0 +#CHECK: error: invalid operand +#CHECK: vstrszb %v0, %v0, %v0, %v0, 0, 0 + + vstrszb %v0, %v0, %v0, %v0, -1 + vstrszb %v0, %v0, %v0, %v0, 16 + vstrszb %v0, %v0, %v0 + vstrszb %v0, %v0, %v0, %v0, 0, 0 + +#CHECK: error: invalid operand +#CHECK: vstrszf %v0, %v0, %v0, %v0, -1 +#CHECK: error: invalid operand +#CHECK: vstrszf %v0, %v0, %v0, %v0, 16 +#CHECK: error: too few operands +#CHECK: vstrszf %v0, %v0, %v0 +#CHECK: error: invalid operand +#CHECK: vstrszf %v0, %v0, %v0, %v0, 0, 0 + + vstrszf %v0, %v0, %v0, %v0, -1 + vstrszf %v0, %v0, %v0, %v0, 16 + vstrszf %v0, %v0, %v0 + vstrszf %v0, %v0, %v0, %v0, 0, 0 + +#CHECK: error: invalid operand +#CHECK: vstrszh %v0, %v0, %v0, %v0, -1 +#CHECK: error: invalid operand +#CHECK: vstrszh %v0, %v0, %v0, %v0, 16 +#CHECK: error: too few operands +#CHECK: vstrszh %v0, %v0, %v0 +#CHECK: error: invalid operand +#CHECK: vstrszh %v0, %v0, %v0, %v0, 0, 0 + + vstrszh %v0, %v0, %v0, %v0, -1 + vstrszh %v0, %v0, %v0, %v0, 16 + vstrszh %v0, %v0, %v0 + vstrszh %v0, %v0, %v0, %v0, 0, 0 + +#CHECK: error: invalid operand +#CHECK: wcefb %v0, %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: wcefb %v0, %v0, 0, 16 +#CHECK: error: invalid operand +#CHECK: wcefb %v0, %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: wcefb %v0, %v0, 16, 0 + + wcefb %v0, %v0, 0, -1 + wcefb %v0, %v0, 0, 16 + wcefb %v0, %v0, -1, 0 + wcefb %v0, %v0, 16, 0 + +#CHECK: error: invalid operand +#CHECK: wcelfb %v0, %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: wcelfb %v0, %v0, 0, 16 +#CHECK: error: invalid operand +#CHECK: wcelfb %v0, %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: wcelfb %v0, %v0, 16, 0 + + wcelfb %v0, %v0, 0, -1 + wcelfb %v0, %v0, 0, 16 + wcelfb %v0, %v0, -1, 0 + wcelfb %v0, %v0, 16, 0 + +#CHECK: error: invalid operand +#CHECK: wcfeb %v0, %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: wcfeb %v0, %v0, 0, 16 +#CHECK: error: invalid operand +#CHECK: wcfeb %v0, %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: wcfeb %v0, %v0, 16, 0 + + wcfeb %v0, %v0, 0, -1 + wcfeb %v0, %v0, 0, 16 + wcfeb %v0, %v0, -1, 0 + wcfeb %v0, %v0, 16, 0 + +#CHECK: error: invalid operand +#CHECK: wclfeb %v0, %v0, 0, -1 +#CHECK: error: invalid operand +#CHECK: wclfeb %v0, %v0, 0, 16 +#CHECK: error: invalid operand +#CHECK: wclfeb %v0, %v0, -1, 0 +#CHECK: error: invalid operand +#CHECK: wclfeb %v0, %v0, 16, 0 + + wclfeb %v0, %v0, 0, -1 + wclfeb %v0, %v0, 0, 16 + wclfeb %v0, %v0, -1, 0 + wclfeb %v0, %v0, 16, 0 + diff --git a/llvm/test/MC/SystemZ/insn-bad-z14.s b/llvm/test/MC/SystemZ/insn-bad-z14.s index 8bc736a7a1a..f3c8414af7d 100644 --- a/llvm/test/MC/SystemZ/insn-bad-z14.s +++ b/llvm/test/MC/SystemZ/insn-bad-z14.s @@ -34,6 +34,16 @@ agh %r0, -524289 agh %r0, 524288 +#CHECK: error: instruction requires: deflate-conversion +#CHECK: dfltcc %r2, %r4, %r6 + + dfltcc %r2, %r4, %r6 + +#CHECK: error: instruction requires: message-security-assist-extension9 +#CHECK: kdsa %r0, %r2 + + kdsa %r0, %r2 + #CHECK: error: invalid register pair #CHECK: kma %r1, %r2, %r4 #CHECK: error: invalid register pair @@ -109,6 +119,66 @@ msgc %r0, -524289 msgc %r0, 524288 +#CHECK: error: instruction requires: miscellaneous-extensions-3 +#CHECK: mvcrl 0, 0 + + mvcrl 0, 0 + +#CHECK: error: instruction requires: miscellaneous-extensions-3 +#CHECK: ncgrk %r0, %r0, %r0 + + ncgrk %r0, %r0, %r0 + +#CHECK: error: instruction requires: miscellaneous-extensions-3 +#CHECK: ncrk %r0, %r0, %r0 + + ncrk %r0, %r0, %r0 + +#CHECK: error: instruction requires: miscellaneous-extensions-3 +#CHECK: nngrk %r0, %r0, %r0 + + nngrk %r0, %r0, %r0 + +#CHECK: error: instruction requires: miscellaneous-extensions-3 +#CHECK: nnrk %r0, %r0, %r0 + + nnrk %r0, %r0, %r0 + +#CHECK: error: instruction requires: miscellaneous-extensions-3 +#CHECK: nogrk %r0, %r0, %r0 + + nogrk %r0, %r0, %r0 + +#CHECK: error: instruction requires: miscellaneous-extensions-3 +#CHECK: nork %r0, %r0, %r0 + + nork %r0, %r0, %r0 + +#CHECK: error: instruction requires: miscellaneous-extensions-3 +#CHECK: nxgrk %r0, %r0, %r0 + + nxgrk %r0, %r0, %r0 + +#CHECK: error: instruction requires: miscellaneous-extensions-3 +#CHECK: nxrk %r0, %r0, %r0 + + nxrk %r0, %r0, %r0 + +#CHECK: error: instruction requires: miscellaneous-extensions-3 +#CHECK: ocgrk %r0, %r0, %r0 + + ocgrk %r0, %r0, %r0 + +#CHECK: error: instruction requires: miscellaneous-extensions-3 +#CHECK: ocrk %r0, %r0, %r0 + + ocrk %r0, %r0, %r0 + +#CHECK: error: instruction requires: miscellaneous-extensions-3 +#CHECK: popcnt %r2, %r4, 1 + + popcnt %r2, %r4, 1 + #CHECK: error: invalid register pair #CHECK: prno %r1, %r2 #CHECK: error: invalid register pair @@ -117,6 +187,30 @@ prno %r1, %r2 prno %r2, %r1 +#CHECK: error: instruction requires: miscellaneous-extensions-3 +#CHECK: selgr %r0, %r0, %r0, 0 +#CHECK: error: instruction requires: miscellaneous-extensions-3 +#CHECK: selgre %r0, %r0, %r0 + + selgr %r0, %r0, %r0, 0 + selgre %r0, %r0, %r0 + +#CHECK: error: instruction requires: miscellaneous-extensions-3 +#CHECK: selfhr %r0, %r0, %r0, 0 +#CHECK: error: instruction requires: miscellaneous-extensions-3 +#CHECK: selfhre %r0, %r0, %r0 + + selfhr %r0, %r0, %r0, 0 + selfhre %r0, %r0, %r0 + +#CHECK: error: instruction requires: miscellaneous-extensions-3 +#CHECK: selr %r0, %r0, %r0, 0 +#CHECK: error: instruction requires: miscellaneous-extensions-3 +#CHECK: selre %r0, %r0, %r0 + + selr %r0, %r0, %r0, 0 + selre %r0, %r0, %r0 + #CHECK: error: invalid operand #CHECK: sgh %r0, -524289 #CHECK: error: invalid operand @@ -125,6 +219,11 @@ sgh %r0, -524289 sgh %r0, 524288 +#CHECK: error: instruction requires: enhanced-sort +#CHECK: sortl %r2, %r4 + + sortl %r2, %r4 + #CHECK: error: invalid operand #CHECK: stgsc %r0, -524289 #CHECK: error: invalid operand @@ -147,6 +246,41 @@ vap %v0, %v0, %v0, -1, 0 vap %v0, %v0, %v0, 256, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vcefb %v0, %v0, 0, 0 + + vcefb %v0, %v0, 0, 0 + +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vcelfb %v0, %v0, 0, 0 + + vcelfb %v0, %v0, 0, 0 + +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vcfeb %v0, %v0, 0, 0 + + vcfeb %v0, %v0, 0, 0 + +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vcfpl %v0, %v0, 0, 0, 0 + + vcfpl %v0, %v0, 0, 0, 0 + +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vcfps %v0, %v0, 0, 0, 0 + + vcfps %v0, %v0, 0, 0, 0 + +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vclfeb %v0, %v0, 0, 0 + + vclfeb %v0, %v0, 0, 0 + +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vclfp %v0, %v0, 0, 0, 0 + + vclfp %v0, %v0, 0, 0, 0 + #CHECK: error: invalid operand #CHECK: vcp %v0, %v0, -1 #CHECK: error: invalid operand @@ -155,21 +289,32 @@ vcp %v0, %v0, -1 vcp %v0, %v0, 16 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vcsfp %v0, %v0, 0, 0, 0 + + vcsfp %v0, %v0, 0, 0, 0 + #CHECK: error: invalid operand #CHECK: vcvb %r0, %v0, -1 #CHECK: error: invalid operand #CHECK: vcvb %r0, %v0, 16 +#CHECK: error: instruction requires: vector-packed-decimal-enhancement +#CHECK: vcvb %r0, %v0, 0, 1 vcvb %r0, %v0, -1 vcvb %r0, %v0, 16 + vcvb %r0, %v0, 0, 1 #CHECK: error: invalid operand #CHECK: vcvbg %r0, %v0, -1 #CHECK: error: invalid operand #CHECK: vcvbg %r0, %v0, 16 +#CHECK: error: instruction requires: vector-packed-decimal-enhancement +#CHECK: vcvbg %r0, %v0, 0, 1 vcvbg %r0, %v0, -1 vcvbg %r0, %v0, 16 + vcvbg %r0, %v0, 0, 1 #CHECK: error: invalid operand #CHECK: vcvd %r0, %v0, 0, -1 @@ -408,6 +553,79 @@ vllezlf %v0, 4096 vllezlf %v0, 0(%v1,%r2) +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vlbr %v0, 0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vlbrf %v0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vlbrg %v0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vlbrh %v0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vlbrq %v0, 0 + + vlbr %v0, 0, 0 + vlbrf %v0, 0 + vlbrg %v0, 0 + vlbrh %v0, 0 + vlbrq %v0, 0 + +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vlbrrep %v0, 0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vlbrrepf %v0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vlbrrepg %v0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vlbrreph %v0, 0 + + vlbrrep %v0, 0, 0 + vlbrrepf %v0, 0 + vlbrrepg %v0, 0 + vlbrreph %v0, 0 + +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vlebrf %v0, 0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vlebrg %v0, 0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vlebrh %v0, 0, 0 + + vlebrf %v0, 0, 0 + vlebrg %v0, 0, 0 + vlebrh %v0, 0, 0 + +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vler %v0, 0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vlerf %v0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vlerg %v0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vlerh %v0, 0 + + vler %v0, 0, 0 + vlerf %v0, 0 + vlerg %v0, 0 + vlerh %v0, 0 + +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vllebrz %v0, 0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vllebrze %v0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vllebrzf %v0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vllebrzg %v0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vllebrzh %v0, 0 + + vllebrz %v0, 0, 0 + vllebrze %v0, 0 + vllebrzf %v0, 0 + vllebrzg %v0, 0 + vllebrzh %v0, 0 + #CHECK: error: invalid operand #CHECK: vlrl %v0, 0, -1 #CHECK: error: invalid operand @@ -551,6 +769,11 @@ vsdp %v0, %v0, %v0, -1, 0 vsdp %v0, %v0, %v0, 256, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vsld %v0, %v0, %v0, 0 + + vsld %v0, %v0, %v0, 0 + #CHECK: error: invalid operand #CHECK: vsp %v0, %v0, %v0, 0, -1 #CHECK: error: invalid operand @@ -565,6 +788,11 @@ vsp %v0, %v0, %v0, -1, 0 vsp %v0, %v0, %v0, 256, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vsrd %v0, %v0, %v0, 0 + + vsrd %v0, %v0, %v0, 0 + #CHECK: error: invalid operand #CHECK: vsrp %v0, %v0, 0, 0, -1 #CHECK: error: invalid operand @@ -585,6 +813,48 @@ vsrp %v0, %v0, -1, 0, 0 vsrp %v0, %v0, 256, 0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vstbr %v0, 0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vstbrf %v0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vstbrg %v0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vstbrh %v0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vstbrq %v0, 0 + + vstbr %v0, 0, 0 + vstbrf %v0, 0 + vstbrg %v0, 0 + vstbrh %v0, 0 + vstbrq %v0, 0 + +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vstebrf %v0, 0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vstebrg %v0, 0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vstebrh %v0, 0, 0 + + vstebrf %v0, 0, 0 + vstebrg %v0, 0, 0 + vstebrh %v0, 0, 0 + +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vster %v0, 0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vsterf %v0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vsterg %v0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vsterh %v0, 0 + + vster %v0, 0, 0 + vsterf %v0, 0 + vsterg %v0, 0 + vsterh %v0, 0 + #CHECK: error: invalid operand #CHECK: vstrl %v0, 0, -1 #CHECK: error: invalid operand @@ -613,6 +883,29 @@ vstrlr %v0, %r0, 4096 vstrlr %v0, %r0, 0(%r0) +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vstrs %v0, %v0, %v0, %v0, 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vstrsb %v0, %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vstrsf %v0, %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vstrsh %v0, %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vstrszb %v0, %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vstrszf %v0, %v0, %v0, %v0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: vstrszh %v0, %v0, %v0, %v0 + + vstrs %v0, %v0, %v0, %v0, 0 + vstrsb %v0, %v0, %v0, %v0 + vstrsf %v0, %v0, %v0, %v0 + vstrsh %v0, %v0, %v0, %v0 + vstrszb %v0, %v0, %v0, %v0 + vstrszf %v0, %v0, %v0, %v0 + vstrszh %v0, %v0, %v0, %v0 + #CHECK: error: invalid operand #CHECK: vupkz %v0, 0, -1 #CHECK: error: invalid operand @@ -630,6 +923,26 @@ vupkz %v0, 4096, 0 vupkz %v0, 0(%r0), 0 +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: wcefb %v0, %v0, 0, 0 + + wcefb %v0, %v0, 0, 0 + +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: wcelfb %v0, %v0, 0, 0 + + wcelfb %v0, %v0, 0, 0 + +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: wcfeb %v0, %v0, 0, 0 + + wcfeb %v0, %v0, 0, 0 + +#CHECK: error: instruction requires: vector-enhancements-2 +#CHECK: wclfeb %v0, %v0, 0, 0 + + wclfeb %v0, %v0, 0, 0 + #CHECK: error: invalid operand #CHECK: wfisb %v0, %v0, 0, -1 #CHECK: error: invalid operand diff --git a/llvm/test/MC/SystemZ/insn-good-arch13.s b/llvm/test/MC/SystemZ/insn-good-arch13.s new file mode 100644 index 00000000000..0bbb8a54638 --- /dev/null +++ b/llvm/test/MC/SystemZ/insn-good-arch13.s @@ -0,0 +1,1344 @@ +# For arch13 and above. +# RUN: llvm-mc -triple s390x-linux-gnu -mcpu=arch13 -show-encoding %s \ +# RUN: | FileCheck %s + +#CHECK: dfltcc %r2, %r2, %r2 # encoding: [0xb9,0x39,0x20,0x22] +#CHECK: dfltcc %r2, %r8, %r15 # encoding: [0xb9,0x39,0xf0,0x28] +#CHECK: dfltcc %r14, %r8, %r2 # encoding: [0xb9,0x39,0x20,0xe8] +#CHECK: dfltcc %r6, %r8, %r10 # encoding: [0xb9,0x39,0xa0,0x68] + + dfltcc %r2, %r2, %r2 + dfltcc %r2, %r8, %r15 + dfltcc %r14, %r8, %r2 + dfltcc %r6, %r8, %r10 + +#CHECK: kdsa %r0, %r2 # encoding: [0xb9,0x3a,0x00,0x02] +#CHECK: kdsa %r0, %r14 # encoding: [0xb9,0x3a,0x00,0x0e] +#CHECK: kdsa %r15, %r2 # encoding: [0xb9,0x3a,0x00,0xf2] +#CHECK: kdsa %r7, %r10 # encoding: [0xb9,0x3a,0x00,0x7a] + + kdsa %r0, %r2 + kdsa %r0, %r14 + kdsa %r15, %r2 + kdsa %r7, %r10 + +#CHECK: vllebrzg %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x30,0x04] +#CHECK: vllebrzg %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x30,0x04] +#CHECK: vllebrzg %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x30,0x04] +#CHECK: vllebrzg %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x30,0x04] +#CHECK: vllebrzg %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x30,0x04] +#CHECK: vllebrzg %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x38,0x04] +#CHECK: vllebrzg %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x38,0x04] + + ldrv %f0, 0 + ldrv %f0, 4095 + ldrv %f0, 0(%r15) + ldrv %f0, 0(%r15,%r1) + ldrv %f15, 0 + ldrv %v31, 0 + ldrv %v18, 0x567(%r3,%r4) + +#CHECK: vllebrze %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x60,0x04] +#CHECK: vllebrze %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x60,0x04] +#CHECK: vllebrze %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x60,0x04] +#CHECK: vllebrze %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x60,0x04] +#CHECK: vllebrze %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x60,0x04] +#CHECK: vllebrze %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x68,0x04] +#CHECK: vllebrze %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x68,0x04] + + lerv %f0, 0 + lerv %f0, 4095 + lerv %f0, 0(%r15) + lerv %f0, 0(%r15,%r1) + lerv %f15, 0 + lerv %v31, 0 + lerv %v18, 0x567(%r3,%r4) + +#CHECK: mvcrl 0, 0 # encoding: [0xe5,0x0a,0x00,0x00,0x00,0x00] +#CHECK: mvcrl 0(%r1), 0(%r2) # encoding: [0xe5,0x0a,0x10,0x00,0x20,0x00] +#CHECK: mvcrl 160(%r1), 320(%r15) # encoding: [0xe5,0x0a,0x10,0xa0,0xf1,0x40] +#CHECK: mvcrl 0(%r1), 4095 # encoding: [0xe5,0x0a,0x10,0x00,0x0f,0xff] +#CHECK: mvcrl 0(%r1), 4095(%r2) # encoding: [0xe5,0x0a,0x10,0x00,0x2f,0xff] +#CHECK: mvcrl 0(%r1), 4095(%r15) # encoding: [0xe5,0x0a,0x10,0x00,0xff,0xff] +#CHECK: mvcrl 0(%r1), 0 # encoding: [0xe5,0x0a,0x10,0x00,0x00,0x00] +#CHECK: mvcrl 0(%r15), 0 # encoding: [0xe5,0x0a,0xf0,0x00,0x00,0x00] +#CHECK: mvcrl 4095(%r1), 0 # encoding: [0xe5,0x0a,0x1f,0xff,0x00,0x00] +#CHECK: mvcrl 4095(%r15), 0 # encoding: [0xe5,0x0a,0xff,0xff,0x00,0x00] + + mvcrl 0, 0 + mvcrl 0(%r1), 0(%r2) + mvcrl 160(%r1), 320(%r15) + mvcrl 0(%r1), 4095 + mvcrl 0(%r1), 4095(%r2) + mvcrl 0(%r1), 4095(%r15) + mvcrl 0(%r1), 0 + mvcrl 0(%r15), 0 + mvcrl 4095(%r1), 0 + mvcrl 4095(%r15), 0 + +#CHECK: ncgrk %r0, %r0, %r0 # encoding: [0xb9,0xe5,0x00,0x00] +#CHECK: ncgrk %r0, %r0, %r15 # encoding: [0xb9,0xe5,0xf0,0x00] +#CHECK: ncgrk %r0, %r15, %r0 # encoding: [0xb9,0xe5,0x00,0x0f] +#CHECK: ncgrk %r15, %r0, %r0 # encoding: [0xb9,0xe5,0x00,0xf0] +#CHECK: ncgrk %r7, %r8, %r9 # encoding: [0xb9,0xe5,0x90,0x78] + + ncgrk %r0,%r0,%r0 + ncgrk %r0,%r0,%r15 + ncgrk %r0,%r15,%r0 + ncgrk %r15,%r0,%r0 + ncgrk %r7,%r8,%r9 + +#CHECK: ncrk %r0, %r0, %r0 # encoding: [0xb9,0xf5,0x00,0x00] +#CHECK: ncrk %r0, %r0, %r15 # encoding: [0xb9,0xf5,0xf0,0x00] +#CHECK: ncrk %r0, %r15, %r0 # encoding: [0xb9,0xf5,0x00,0x0f] +#CHECK: ncrk %r15, %r0, %r0 # encoding: [0xb9,0xf5,0x00,0xf0] +#CHECK: ncrk %r7, %r8, %r9 # encoding: [0xb9,0xf5,0x90,0x78] + + ncrk %r0,%r0,%r0 + ncrk %r0,%r0,%r15 + ncrk %r0,%r15,%r0 + ncrk %r15,%r0,%r0 + ncrk %r7,%r8,%r9 + +#CHECK: nngrk %r0, %r0, %r0 # encoding: [0xb9,0x64,0x00,0x00] +#CHECK: nngrk %r0, %r0, %r15 # encoding: [0xb9,0x64,0xf0,0x00] +#CHECK: nngrk %r0, %r15, %r0 # encoding: [0xb9,0x64,0x00,0x0f] +#CHECK: nngrk %r15, %r0, %r0 # encoding: [0xb9,0x64,0x00,0xf0] +#CHECK: nngrk %r7, %r8, %r9 # encoding: [0xb9,0x64,0x90,0x78] + + nngrk %r0,%r0,%r0 + nngrk %r0,%r0,%r15 + nngrk %r0,%r15,%r0 + nngrk %r15,%r0,%r0 + nngrk %r7,%r8,%r9 + +#CHECK: nnrk %r0, %r0, %r0 # encoding: [0xb9,0x74,0x00,0x00] +#CHECK: nnrk %r0, %r0, %r15 # encoding: [0xb9,0x74,0xf0,0x00] +#CHECK: nnrk %r0, %r15, %r0 # encoding: [0xb9,0x74,0x00,0x0f] +#CHECK: nnrk %r15, %r0, %r0 # encoding: [0xb9,0x74,0x00,0xf0] +#CHECK: nnrk %r7, %r8, %r9 # encoding: [0xb9,0x74,0x90,0x78] + + nnrk %r0,%r0,%r0 + nnrk %r0,%r0,%r15 + nnrk %r0,%r15,%r0 + nnrk %r15,%r0,%r0 + nnrk %r7,%r8,%r9 + +#CHECK: nogrk %r0, %r0, %r0 # encoding: [0xb9,0x66,0x00,0x00] +#CHECK: nogrk %r0, %r0, %r15 # encoding: [0xb9,0x66,0xf0,0x00] +#CHECK: nogrk %r0, %r15, %r0 # encoding: [0xb9,0x66,0x00,0x0f] +#CHECK: nogrk %r15, %r0, %r0 # encoding: [0xb9,0x66,0x00,0xf0] +#CHECK: nogrk %r7, %r8, %r9 # encoding: [0xb9,0x66,0x90,0x78] + + nogrk %r0,%r0,%r0 + nogrk %r0,%r0,%r15 + nogrk %r0,%r15,%r0 + nogrk %r15,%r0,%r0 + nogrk %r7,%r8,%r9 + +#CHECK: nork %r0, %r0, %r0 # encoding: [0xb9,0x76,0x00,0x00] +#CHECK: nork %r0, %r0, %r15 # encoding: [0xb9,0x76,0xf0,0x00] +#CHECK: nork %r0, %r15, %r0 # encoding: [0xb9,0x76,0x00,0x0f] +#CHECK: nork %r15, %r0, %r0 # encoding: [0xb9,0x76,0x00,0xf0] +#CHECK: nork %r7, %r8, %r9 # encoding: [0xb9,0x76,0x90,0x78] + + nork %r0,%r0,%r0 + nork %r0,%r0,%r15 + nork %r0,%r15,%r0 + nork %r15,%r0,%r0 + nork %r7,%r8,%r9 + +#CHECK: nxgrk %r0, %r0, %r0 # encoding: [0xb9,0x67,0x00,0x00] +#CHECK: nxgrk %r0, %r0, %r15 # encoding: [0xb9,0x67,0xf0,0x00] +#CHECK: nxgrk %r0, %r15, %r0 # encoding: [0xb9,0x67,0x00,0x0f] +#CHECK: nxgrk %r15, %r0, %r0 # encoding: [0xb9,0x67,0x00,0xf0] +#CHECK: nxgrk %r7, %r8, %r9 # encoding: [0xb9,0x67,0x90,0x78] + + nxgrk %r0,%r0,%r0 + nxgrk %r0,%r0,%r15 + nxgrk %r0,%r15,%r0 + nxgrk %r15,%r0,%r0 + nxgrk %r7,%r8,%r9 + +#CHECK: nxrk %r0, %r0, %r0 # encoding: [0xb9,0x77,0x00,0x00] +#CHECK: nxrk %r0, %r0, %r15 # encoding: [0xb9,0x77,0xf0,0x00] +#CHECK: nxrk %r0, %r15, %r0 # encoding: [0xb9,0x77,0x00,0x0f] +#CHECK: nxrk %r15, %r0, %r0 # encoding: [0xb9,0x77,0x00,0xf0] +#CHECK: nxrk %r7, %r8, %r9 # encoding: [0xb9,0x77,0x90,0x78] + + nxrk %r0,%r0,%r0 + nxrk %r0,%r0,%r15 + nxrk %r0,%r15,%r0 + nxrk %r15,%r0,%r0 + nxrk %r7,%r8,%r9 + +#CHECK: ocgrk %r0, %r0, %r0 # encoding: [0xb9,0x65,0x00,0x00] +#CHECK: ocgrk %r0, %r0, %r15 # encoding: [0xb9,0x65,0xf0,0x00] +#CHECK: ocgrk %r0, %r15, %r0 # encoding: [0xb9,0x65,0x00,0x0f] +#CHECK: ocgrk %r15, %r0, %r0 # encoding: [0xb9,0x65,0x00,0xf0] +#CHECK: ocgrk %r7, %r8, %r9 # encoding: [0xb9,0x65,0x90,0x78] + + ocgrk %r0,%r0,%r0 + ocgrk %r0,%r0,%r15 + ocgrk %r0,%r15,%r0 + ocgrk %r15,%r0,%r0 + ocgrk %r7,%r8,%r9 + +#CHECK: ocrk %r0, %r0, %r0 # encoding: [0xb9,0x75,0x00,0x00] +#CHECK: ocrk %r0, %r0, %r15 # encoding: [0xb9,0x75,0xf0,0x00] +#CHECK: ocrk %r0, %r15, %r0 # encoding: [0xb9,0x75,0x00,0x0f] +#CHECK: ocrk %r15, %r0, %r0 # encoding: [0xb9,0x75,0x00,0xf0] +#CHECK: ocrk %r7, %r8, %r9 # encoding: [0xb9,0x75,0x90,0x78] + + ocrk %r0,%r0,%r0 + ocrk %r0,%r0,%r15 + ocrk %r0,%r15,%r0 + ocrk %r15,%r0,%r0 + ocrk %r7,%r8,%r9 + +#CHECK: popcnt %r0, %r0 # encoding: [0xb9,0xe1,0x00,0x00] +#CHECK: popcnt %r0, %r15 # encoding: [0xb9,0xe1,0x00,0x0f] +#CHECK: popcnt %r14, %r0 # encoding: [0xb9,0xe1,0x00,0xe0] +#CHECK: popcnt %r6, %r8 # encoding: [0xb9,0xe1,0x00,0x68] +#CHECK: popcnt %r4, %r13, 1 # encoding: [0xb9,0xe1,0x10,0x4d] +#CHECK: popcnt %r4, %r13, 15 # encoding: [0xb9,0xe1,0xf0,0x4d] + + popcnt %r0, %r0 + popcnt %r0, %r15 + popcnt %r14, %r0 + popcnt %r6, %r8 + popcnt %r4, %r13, 1 + popcnt %r4, %r13, 15 + +#CHECK: selgr %r0, %r0, %r0, 0 # encoding: [0xb9,0xe3,0x00,0x00] +#CHECK: selgr %r0, %r0, %r0, 15 # encoding: [0xb9,0xe3,0x0f,0x00] +#CHECK: selgr %r0, %r0, %r15, 0 # encoding: [0xb9,0xe3,0xf0,0x00] +#CHECK: selgr %r0, %r15, %r0, 0 # encoding: [0xb9,0xe3,0x00,0x0f] +#CHECK: selgr %r15, %r0, %r0, 0 # encoding: [0xb9,0xe3,0x00,0xf0] +#CHECK: selgr %r7, %r8, %r9, 10 # encoding: [0xb9,0xe3,0x9a,0x78] + + selgr %r0, %r0, %r0, 0 + selgr %r0, %r0, %r0, 15 + selgr %r0, %r0, %r15, 0 + selgr %r0, %r15, %r0, 0 + selgr %r15, %r0, %r0, 0 + selgr %r7, %r8, %r9, 10 + +#CHECK: selgro %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x31,0x12] +#CHECK: selgrh %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x32,0x12] +#CHECK: selgrp %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x32,0x12] +#CHECK: selgrnle %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x33,0x12] +#CHECK: selgrl %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x34,0x12] +#CHECK: selgrm %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x34,0x12] +#CHECK: selgrnhe %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x35,0x12] +#CHECK: selgrlh %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x36,0x12] +#CHECK: selgrne %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x37,0x12] +#CHECK: selgrnz %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x37,0x12] +#CHECK: selgre %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x38,0x12] +#CHECK: selgrz %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x38,0x12] +#CHECK: selgrnlh %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x39,0x12] +#CHECK: selgrhe %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x3a,0x12] +#CHECK: selgrnl %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x3b,0x12] +#CHECK: selgrnm %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x3b,0x12] +#CHECK: selgrle %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x3c,0x12] +#CHECK: selgrnh %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x3d,0x12] +#CHECK: selgrnp %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x3d,0x12] +#CHECK: selgrno %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x3e,0x12] + + selgro %r1, %r2, %r3 + selgrh %r1, %r2, %r3 + selgrp %r1, %r2, %r3 + selgrnle %r1, %r2, %r3 + selgrl %r1, %r2, %r3 + selgrm %r1, %r2, %r3 + selgrnhe %r1, %r2, %r3 + selgrlh %r1, %r2, %r3 + selgrne %r1, %r2, %r3 + selgrnz %r1, %r2, %r3 + selgre %r1, %r2, %r3 + selgrz %r1, %r2, %r3 + selgrnlh %r1, %r2, %r3 + selgrhe %r1, %r2, %r3 + selgrnl %r1, %r2, %r3 + selgrnm %r1, %r2, %r3 + selgrle %r1, %r2, %r3 + selgrnh %r1, %r2, %r3 + selgrnp %r1, %r2, %r3 + selgrno %r1, %r2, %r3 + +#CHECK: selfhr %r0, %r0, %r0, 0 # encoding: [0xb9,0xc0,0x00,0x00] +#CHECK: selfhr %r0, %r0, %r0, 15 # encoding: [0xb9,0xc0,0x0f,0x00] +#CHECK: selfhr %r0, %r0, %r15, 0 # encoding: [0xb9,0xc0,0xf0,0x00] +#CHECK: selfhr %r0, %r15, %r0, 0 # encoding: [0xb9,0xc0,0x00,0x0f] +#CHECK: selfhr %r15, %r0, %r0, 0 # encoding: [0xb9,0xc0,0x00,0xf0] +#CHECK: selfhr %r7, %r8, %r9, 10 # encoding: [0xb9,0xc0,0x9a,0x78] + + selfhr %r0, %r0, %r0, 0 + selfhr %r0, %r0, %r0, 15 + selfhr %r0, %r0, %r15, 0 + selfhr %r0, %r15, %r0, 0 + selfhr %r15, %r0, %r0, 0 + selfhr %r7, %r8, %r9, 10 + +#CHECK: selfhro %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x31,0x12] +#CHECK: selfhrh %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x32,0x12] +#CHECK: selfhrp %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x32,0x12] +#CHECK: selfhrnle %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x33,0x12] +#CHECK: selfhrl %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x34,0x12] +#CHECK: selfhrm %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x34,0x12] +#CHECK: selfhrnhe %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x35,0x12] +#CHECK: selfhrlh %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x36,0x12] +#CHECK: selfhrne %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x37,0x12] +#CHECK: selfhrnz %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x37,0x12] +#CHECK: selfhre %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x38,0x12] +#CHECK: selfhrz %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x38,0x12] +#CHECK: selfhrnlh %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x39,0x12] +#CHECK: selfhrhe %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x3a,0x12] +#CHECK: selfhrnl %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x3b,0x12] +#CHECK: selfhrnm %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x3b,0x12] +#CHECK: selfhrle %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x3c,0x12] +#CHECK: selfhrnh %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x3d,0x12] +#CHECK: selfhrnp %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x3d,0x12] +#CHECK: selfhrno %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x3e,0x12] + + selfhro %r1, %r2, %r3 + selfhrh %r1, %r2, %r3 + selfhrp %r1, %r2, %r3 + selfhrnle %r1, %r2, %r3 + selfhrl %r1, %r2, %r3 + selfhrm %r1, %r2, %r3 + selfhrnhe %r1, %r2, %r3 + selfhrlh %r1, %r2, %r3 + selfhrne %r1, %r2, %r3 + selfhrnz %r1, %r2, %r3 + selfhre %r1, %r2, %r3 + selfhrz %r1, %r2, %r3 + selfhrnlh %r1, %r2, %r3 + selfhrhe %r1, %r2, %r3 + selfhrnl %r1, %r2, %r3 + selfhrnm %r1, %r2, %r3 + selfhrle %r1, %r2, %r3 + selfhrnh %r1, %r2, %r3 + selfhrnp %r1, %r2, %r3 + selfhrno %r1, %r2, %r3 + +#CHECK: selr %r0, %r0, %r0, 0 # encoding: [0xb9,0xf0,0x00,0x00] +#CHECK: selr %r0, %r0, %r0, 15 # encoding: [0xb9,0xf0,0x0f,0x00] +#CHECK: selr %r0, %r0, %r15, 0 # encoding: [0xb9,0xf0,0xf0,0x00] +#CHECK: selr %r0, %r15, %r0, 0 # encoding: [0xb9,0xf0,0x00,0x0f] +#CHECK: selr %r15, %r0, %r0, 0 # encoding: [0xb9,0xf0,0x00,0xf0] +#CHECK: selr %r7, %r8, %r9, 10 # encoding: [0xb9,0xf0,0x9a,0x78] + + selr %r0, %r0, %r0, 0 + selr %r0, %r0, %r0, 15 + selr %r0, %r0, %r15, 0 + selr %r0, %r15, %r0, 0 + selr %r15, %r0, %r0, 0 + selr %r7, %r8, %r9, 10 + +#CHECK: selro %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x31,0x12] +#CHECK: selrh %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x32,0x12] +#CHECK: selrp %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x32,0x12] +#CHECK: selrnle %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x33,0x12] +#CHECK: selrl %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x34,0x12] +#CHECK: selrm %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x34,0x12] +#CHECK: selrnhe %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x35,0x12] +#CHECK: selrlh %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x36,0x12] +#CHECK: selrne %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x37,0x12] +#CHECK: selrnz %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x37,0x12] +#CHECK: selre %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x38,0x12] +#CHECK: selrz %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x38,0x12] +#CHECK: selrnlh %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x39,0x12] +#CHECK: selrhe %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x3a,0x12] +#CHECK: selrnl %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x3b,0x12] +#CHECK: selrnm %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x3b,0x12] +#CHECK: selrle %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x3c,0x12] +#CHECK: selrnh %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x3d,0x12] +#CHECK: selrnp %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x3d,0x12] +#CHECK: selrno %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x3e,0x12] + + selro %r1, %r2, %r3 + selrh %r1, %r2, %r3 + selrp %r1, %r2, %r3 + selrnle %r1, %r2, %r3 + selrl %r1, %r2, %r3 + selrm %r1, %r2, %r3 + selrnhe %r1, %r2, %r3 + selrlh %r1, %r2, %r3 + selrne %r1, %r2, %r3 + selrnz %r1, %r2, %r3 + selre %r1, %r2, %r3 + selrz %r1, %r2, %r3 + selrnlh %r1, %r2, %r3 + selrhe %r1, %r2, %r3 + selrnl %r1, %r2, %r3 + selrnm %r1, %r2, %r3 + selrle %r1, %r2, %r3 + selrnh %r1, %r2, %r3 + selrnp %r1, %r2, %r3 + selrno %r1, %r2, %r3 + +#CHECK: sortl %r2, %r2 # encoding: [0xb9,0x38,0x00,0x22] +#CHECK: sortl %r2, %r14 # encoding: [0xb9,0x38,0x00,0x2e] +#CHECK: sortl %r14, %r2 # encoding: [0xb9,0x38,0x00,0xe2] +#CHECK: sortl %r6, %r10 # encoding: [0xb9,0x38,0x00,0x6a] + + sortl %r2, %r2 + sortl %r2, %r14 + sortl %r14, %r2 + sortl %r6, %r10 + +#CHECK: vstebrg %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x0a] +#CHECK: vstebrg %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x0a] +#CHECK: vstebrg %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x0a] +#CHECK: vstebrg %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x0a] +#CHECK: vstebrg %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x0a] +#CHECK: vstebrg %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x0a] +#CHECK: vstebrg %v18, 1383(%r3,%r4), 0 # encoding: [0xe6,0x23,0x45,0x67,0x08,0x0a] + + stdrv %f0, 0 + stdrv %f0, 4095 + stdrv %f0, 0(%r15) + stdrv %f0, 0(%r15,%r1) + stdrv %f15, 0 + stdrv %v31, 0 + stdrv %v18, 0x567(%r3,%r4) + +#CHECK: vstebrf %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x0b] +#CHECK: vstebrf %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x0b] +#CHECK: vstebrf %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x0b] +#CHECK: vstebrf %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x0b] +#CHECK: vstebrf %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x0b] +#CHECK: vstebrf %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x0b] +#CHECK: vstebrf %v18, 1383(%r3,%r4), 0 # encoding: [0xe6,0x23,0x45,0x67,0x08,0x0b] + + sterv %f0, 0 + sterv %f0, 4095 + sterv %f0, 0(%r15) + sterv %f0, 0(%r15,%r1) + sterv %f15, 0 + sterv %v31, 0 + sterv %v18, 0x567(%r3,%r4) + +#CHECK: vcefb %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xc3] +#CHECK: vcefb %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x20,0xc3] +#CHECK: vcefb %v0, %v0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x20,0xc3] +#CHECK: vcefb %v0, %v0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xc3] +#CHECK: vcefb %v0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xc3] +#CHECK: vcefb %v31, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xc3] +#CHECK: vcefb %v14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0x24,0xc3] + + vcefb %v0, %v0, 0, 0 + vcefb %v0, %v0, 0, 15 + vcefb %v0, %v0, 4, 0 + vcefb %v0, %v0, 12, 0 + vcefb %v0, %v31, 0, 0 + vcefb %v31, %v0, 0, 0 + vcefb %v14, %v17, 4, 10 + +#CHECK: vcelfb %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xc1] +#CHECK: vcelfb %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x20,0xc1] +#CHECK: vcelfb %v0, %v0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x20,0xc1] +#CHECK: vcelfb %v0, %v0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xc1] +#CHECK: vcelfb %v0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xc1] +#CHECK: vcelfb %v31, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xc1] +#CHECK: vcelfb %v14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0x24,0xc1] + + vcelfb %v0, %v0, 0, 0 + vcelfb %v0, %v0, 0, 15 + vcelfb %v0, %v0, 4, 0 + vcelfb %v0, %v0, 12, 0 + vcelfb %v0, %v31, 0, 0 + vcelfb %v31, %v0, 0, 0 + vcelfb %v14, %v17, 4, 10 + +#CHECK: vcfeb %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xc2] +#CHECK: vcfeb %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x20,0xc2] +#CHECK: vcfeb %v0, %v0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x20,0xc2] +#CHECK: vcfeb %v0, %v0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xc2] +#CHECK: vcfeb %v0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xc2] +#CHECK: vcfeb %v31, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xc2] +#CHECK: vcfeb %v14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0x24,0xc2] + + vcfeb %v0, %v0, 0, 0 + vcfeb %v0, %v0, 0, 15 + vcfeb %v0, %v0, 4, 0 + vcfeb %v0, %v0, 12, 0 + vcfeb %v0, %v31, 0, 0 + vcfeb %v31, %v0, 0, 0 + vcfeb %v14, %v17, 4, 10 + +#CHECK: vcfpl %v0, %v0, 0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xc1] +#CHECK: vcfpl %v0, %v0, 15, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0xc1] +#CHECK: vcfpl %v0, %v0, 0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x00,0xc1] +#CHECK: vcfpl %v0, %v0, 0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x00,0xc1] +#CHECK: vcfpl %v0, %v0, 0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x00,0xc1] +#CHECK: vcfpl %v0, %v31, 0, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xc1] +#CHECK: vcfpl %v31, %v0, 0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xc1] +#CHECK: vcfpl %v14, %v17, 11, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0xb4,0xc1] + + vcfpl %v0, %v0, 0, 0, 0 + vcfpl %v0, %v0, 15, 0, 0 + vcfpl %v0, %v0, 0, 0, 15 + vcfpl %v0, %v0, 0, 4, 0 + vcfpl %v0, %v0, 0, 12, 0 + vcfpl %v0, %v31, 0, 0, 0 + vcfpl %v31, %v0, 0, 0, 0 + vcfpl %v14, %v17, 11, 4, 10 + +#CHECK: vcfps %v0, %v0, 0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xc3] +#CHECK: vcfps %v0, %v0, 15, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0xc3] +#CHECK: vcfps %v0, %v0, 0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x00,0xc3] +#CHECK: vcfps %v0, %v0, 0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x00,0xc3] +#CHECK: vcfps %v0, %v0, 0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x00,0xc3] +#CHECK: vcfps %v0, %v31, 0, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xc3] +#CHECK: vcfps %v31, %v0, 0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xc3] +#CHECK: vcfps %v14, %v17, 11, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0xb4,0xc3] + + vcfps %v0, %v0, 0, 0, 0 + vcfps %v0, %v0, 15, 0, 0 + vcfps %v0, %v0, 0, 0, 15 + vcfps %v0, %v0, 0, 4, 0 + vcfps %v0, %v0, 0, 12, 0 + vcfps %v0, %v31, 0, 0, 0 + vcfps %v31, %v0, 0, 0, 0 + vcfps %v14, %v17, 11, 4, 10 + +#CHECK: vclfeb %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xc0] +#CHECK: vclfeb %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x20,0xc0] +#CHECK: vclfeb %v0, %v0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x20,0xc0] +#CHECK: vclfeb %v0, %v0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xc0] +#CHECK: vclfeb %v0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xc0] +#CHECK: vclfeb %v31, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xc0] +#CHECK: vclfeb %v14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0x24,0xc0] + + vclfeb %v0, %v0, 0, 0 + vclfeb %v0, %v0, 0, 15 + vclfeb %v0, %v0, 4, 0 + vclfeb %v0, %v0, 12, 0 + vclfeb %v0, %v31, 0, 0 + vclfeb %v31, %v0, 0, 0 + vclfeb %v14, %v17, 4, 10 + +#CHECK: vclfp %v0, %v0, 0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xc0] +#CHECK: vclfp %v0, %v0, 15, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0xc0] +#CHECK: vclfp %v0, %v0, 0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x00,0xc0] +#CHECK: vclfp %v0, %v0, 0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x00,0xc0] +#CHECK: vclfp %v0, %v0, 0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x00,0xc0] +#CHECK: vclfp %v0, %v31, 0, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xc0] +#CHECK: vclfp %v31, %v0, 0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xc0] +#CHECK: vclfp %v14, %v17, 11, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0xb4,0xc0] + + vclfp %v0, %v0, 0, 0, 0 + vclfp %v0, %v0, 15, 0, 0 + vclfp %v0, %v0, 0, 0, 15 + vclfp %v0, %v0, 0, 4, 0 + vclfp %v0, %v0, 0, 12, 0 + vclfp %v0, %v31, 0, 0, 0 + vclfp %v31, %v0, 0, 0, 0 + vclfp %v14, %v17, 11, 4, 10 + +#CHECK: vcsfp %v0, %v0, 0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xc2] +#CHECK: vcsfp %v0, %v0, 15, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0xc2] +#CHECK: vcsfp %v0, %v0, 0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x00,0xc2] +#CHECK: vcsfp %v0, %v0, 0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x00,0xc2] +#CHECK: vcsfp %v0, %v0, 0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x00,0xc2] +#CHECK: vcsfp %v0, %v31, 0, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xc2] +#CHECK: vcsfp %v31, %v0, 0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xc2] +#CHECK: vcsfp %v14, %v17, 11, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0xb4,0xc2] + + vcsfp %v0, %v0, 0, 0, 0 + vcsfp %v0, %v0, 15, 0, 0 + vcsfp %v0, %v0, 0, 0, 15 + vcsfp %v0, %v0, 0, 4, 0 + vcsfp %v0, %v0, 0, 12, 0 + vcsfp %v0, %v31, 0, 0, 0 + vcsfp %v31, %v0, 0, 0, 0 + vcsfp %v14, %v17, 11, 4, 10 + +#CHECK: vcvb %r0, %v0, 0, 15 # encoding: [0xe6,0x00,0x00,0x0f,0x00,0x50] +#CHECK: vcvb %r3, %v18, 4, 6 # encoding: [0xe6,0x32,0x00,0x46,0x04,0x50] + + vcvb %r0, %v0, 0, 15 + vcvb %r3, %v18, 4, 6 + +#CHECK: vcvbg %r0, %v0, 0, 15 # encoding: [0xe6,0x00,0x00,0x0f,0x00,0x52] +#CHECK: vcvbg %r3, %v18, 4, 6 # encoding: [0xe6,0x32,0x00,0x46,0x04,0x52] + + vcvbg %r0, %v0, 0, 15 + vcvbg %r3, %v18, 4, 6 + +#CHECK: vlbr %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x06] +#CHECK: vlbr %v0, 0, 15 # encoding: [0xe6,0x00,0x00,0x00,0xf0,0x06] +#CHECK: vlbr %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x06] +#CHECK: vlbr %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x06] +#CHECK: vlbr %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x06] +#CHECK: vlbr %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x06] +#CHECK: vlbr %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x06] +#CHECK: vlbr %v18, 1383(%r3,%r4), 11 # encoding: [0xe6,0x23,0x45,0x67,0xb8,0x06] + + vlbr %v0, 0, 0 + vlbr %v0, 0, 15 + vlbr %v0, 4095, 0 + vlbr %v0, 0(%r15), 0 + vlbr %v0, 0(%r15,%r1), 0 + vlbr %v15, 0, 0 + vlbr %v31, 0, 0 + vlbr %v18, 0x567(%r3,%r4), 11 + +#CHECK: vlbrf %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x20,0x06] +#CHECK: vlbrf %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x20,0x06] +#CHECK: vlbrf %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x20,0x06] +#CHECK: vlbrf %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x20,0x06] +#CHECK: vlbrf %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x20,0x06] +#CHECK: vlbrf %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x28,0x06] +#CHECK: vlbrf %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x28,0x06] + + vlbrf %v0, 0 + vlbrf %v0, 4095 + vlbrf %v0, 0(%r15) + vlbrf %v0, 0(%r15,%r1) + vlbrf %v15, 0 + vlbrf %v31, 0 + vlbrf %v18, 0x567(%r3,%r4) + +#CHECK: vlbrg %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x30,0x06] +#CHECK: vlbrg %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x30,0x06] +#CHECK: vlbrg %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x30,0x06] +#CHECK: vlbrg %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x30,0x06] +#CHECK: vlbrg %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x30,0x06] +#CHECK: vlbrg %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x38,0x06] +#CHECK: vlbrg %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x38,0x06] + + vlbrg %v0, 0 + vlbrg %v0, 4095 + vlbrg %v0, 0(%r15) + vlbrg %v0, 0(%r15,%r1) + vlbrg %v15, 0 + vlbrg %v31, 0 + vlbrg %v18, 0x567(%r3,%r4) + +#CHECK: vlbrh %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x10,0x06] +#CHECK: vlbrh %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x10,0x06] +#CHECK: vlbrh %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x10,0x06] +#CHECK: vlbrh %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x10,0x06] +#CHECK: vlbrh %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x10,0x06] +#CHECK: vlbrh %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x18,0x06] +#CHECK: vlbrh %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x18,0x06] + + vlbrh %v0, 0 + vlbrh %v0, 4095 + vlbrh %v0, 0(%r15) + vlbrh %v0, 0(%r15,%r1) + vlbrh %v15, 0 + vlbrh %v31, 0 + vlbrh %v18, 0x567(%r3,%r4) + +#CHECK: vlbrq %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x40,0x06] +#CHECK: vlbrq %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x40,0x06] +#CHECK: vlbrq %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x40,0x06] +#CHECK: vlbrq %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x40,0x06] +#CHECK: vlbrq %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x40,0x06] +#CHECK: vlbrq %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x48,0x06] +#CHECK: vlbrq %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x48,0x06] + + vlbrq %v0, 0 + vlbrq %v0, 4095 + vlbrq %v0, 0(%r15) + vlbrq %v0, 0(%r15,%r1) + vlbrq %v15, 0 + vlbrq %v31, 0 + vlbrq %v18, 0x567(%r3,%r4) + +#CHECK: vlbrrep %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x05] +#CHECK: vlbrrep %v0, 0, 15 # encoding: [0xe6,0x00,0x00,0x00,0xf0,0x05] +#CHECK: vlbrrep %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x05] +#CHECK: vlbrrep %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x05] +#CHECK: vlbrrep %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x05] +#CHECK: vlbrrep %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x05] +#CHECK: vlbrrep %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x05] +#CHECK: vlbrrep %v18, 1383(%r3,%r4), 11 # encoding: [0xe6,0x23,0x45,0x67,0xb8,0x05] + + vlbrrep %v0, 0, 0 + vlbrrep %v0, 0, 15 + vlbrrep %v0, 4095, 0 + vlbrrep %v0, 0(%r15), 0 + vlbrrep %v0, 0(%r15,%r1), 0 + vlbrrep %v15, 0, 0 + vlbrrep %v31, 0, 0 + vlbrrep %v18, 0x567(%r3,%r4), 11 + +#CHECK: vlbrrepf %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x20,0x05] +#CHECK: vlbrrepf %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x20,0x05] +#CHECK: vlbrrepf %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x20,0x05] +#CHECK: vlbrrepf %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x20,0x05] +#CHECK: vlbrrepf %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x20,0x05] +#CHECK: vlbrrepf %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x28,0x05] +#CHECK: vlbrrepf %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x28,0x05] + + vlbrrepf %v0, 0 + vlbrrepf %v0, 4095 + vlbrrepf %v0, 0(%r15) + vlbrrepf %v0, 0(%r15,%r1) + vlbrrepf %v15, 0 + vlbrrepf %v31, 0 + vlbrrepf %v18, 0x567(%r3,%r4) + +#CHECK: vlbrrepg %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x30,0x05] +#CHECK: vlbrrepg %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x30,0x05] +#CHECK: vlbrrepg %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x30,0x05] +#CHECK: vlbrrepg %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x30,0x05] +#CHECK: vlbrrepg %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x30,0x05] +#CHECK: vlbrrepg %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x38,0x05] +#CHECK: vlbrrepg %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x38,0x05] + + vlbrrepg %v0, 0 + vlbrrepg %v0, 4095 + vlbrrepg %v0, 0(%r15) + vlbrrepg %v0, 0(%r15,%r1) + vlbrrepg %v15, 0 + vlbrrepg %v31, 0 + vlbrrepg %v18, 0x567(%r3,%r4) + +#CHECK: vlbrreph %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x10,0x05] +#CHECK: vlbrreph %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x10,0x05] +#CHECK: vlbrreph %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x10,0x05] +#CHECK: vlbrreph %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x10,0x05] +#CHECK: vlbrreph %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x10,0x05] +#CHECK: vlbrreph %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x18,0x05] +#CHECK: vlbrreph %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x18,0x05] + + vlbrreph %v0, 0 + vlbrreph %v0, 4095 + vlbrreph %v0, 0(%r15) + vlbrreph %v0, 0(%r15,%r1) + vlbrreph %v15, 0 + vlbrreph %v31, 0 + vlbrreph %v18, 0x567(%r3,%r4) + +#CHECK: vlebrf %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x03] +#CHECK: vlebrf %v0, 0, 3 # encoding: [0xe6,0x00,0x00,0x00,0x30,0x03] +#CHECK: vlebrf %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x03] +#CHECK: vlebrf %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x03] +#CHECK: vlebrf %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x03] +#CHECK: vlebrf %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x03] +#CHECK: vlebrf %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x03] +#CHECK: vlebrf %v18, 1383(%r3,%r4), 2 # encoding: [0xe6,0x23,0x45,0x67,0x28,0x03] + + vlebrf %v0, 0, 0 + vlebrf %v0, 0, 3 + vlebrf %v0, 4095, 0 + vlebrf %v0, 0(%r15), 0 + vlebrf %v0, 0(%r15,%r1), 0 + vlebrf %v15, 0, 0 + vlebrf %v31, 0, 0 + vlebrf %v18, 1383(%r3,%r4), 2 + +#CHECK: vlebrg %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x02] +#CHECK: vlebrg %v0, 0, 1 # encoding: [0xe6,0x00,0x00,0x00,0x10,0x02] +#CHECK: vlebrg %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x02] +#CHECK: vlebrg %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x02] +#CHECK: vlebrg %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x02] +#CHECK: vlebrg %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x02] +#CHECK: vlebrg %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x02] +#CHECK: vlebrg %v18, 1383(%r3,%r4), 1 # encoding: [0xe6,0x23,0x45,0x67,0x18,0x02] + + vlebrg %v0, 0, 0 + vlebrg %v0, 0, 1 + vlebrg %v0, 4095, 0 + vlebrg %v0, 0(%r15), 0 + vlebrg %v0, 0(%r15,%r1), 0 + vlebrg %v15, 0, 0 + vlebrg %v31, 0, 0 + vlebrg %v18, 1383(%r3,%r4), 1 + +#CHECK: vlebrh %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x01] +#CHECK: vlebrh %v0, 0, 7 # encoding: [0xe6,0x00,0x00,0x00,0x70,0x01] +#CHECK: vlebrh %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x01] +#CHECK: vlebrh %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x01] +#CHECK: vlebrh %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x01] +#CHECK: vlebrh %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x01] +#CHECK: vlebrh %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x01] +#CHECK: vlebrh %v18, 1383(%r3,%r4), 4 # encoding: [0xe6,0x23,0x45,0x67,0x48,0x01] + + vlebrh %v0, 0, 0 + vlebrh %v0, 0, 7 + vlebrh %v0, 4095, 0 + vlebrh %v0, 0(%r15), 0 + vlebrh %v0, 0(%r15,%r1), 0 + vlebrh %v15, 0, 0 + vlebrh %v31, 0, 0 + vlebrh %v18, 1383(%r3,%r4), 4 + +#CHECK: vler %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x07] +#CHECK: vler %v0, 0, 15 # encoding: [0xe6,0x00,0x00,0x00,0xf0,0x07] +#CHECK: vler %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x07] +#CHECK: vler %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x07] +#CHECK: vler %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x07] +#CHECK: vler %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x07] +#CHECK: vler %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x07] +#CHECK: vler %v18, 1383(%r3,%r4), 11 # encoding: [0xe6,0x23,0x45,0x67,0xb8,0x07] + + vler %v0, 0, 0 + vler %v0, 0, 15 + vler %v0, 4095, 0 + vler %v0, 0(%r15), 0 + vler %v0, 0(%r15,%r1), 0 + vler %v15, 0, 0 + vler %v31, 0, 0 + vler %v18, 0x567(%r3,%r4), 11 + +#CHECK: vlerf %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x20,0x07] +#CHECK: vlerf %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x20,0x07] +#CHECK: vlerf %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x20,0x07] +#CHECK: vlerf %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x20,0x07] +#CHECK: vlerf %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x20,0x07] +#CHECK: vlerf %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x28,0x07] +#CHECK: vlerf %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x28,0x07] + + vlerf %v0, 0 + vlerf %v0, 4095 + vlerf %v0, 0(%r15) + vlerf %v0, 0(%r15,%r1) + vlerf %v15, 0 + vlerf %v31, 0 + vlerf %v18, 0x567(%r3,%r4) + +#CHECK: vlerg %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x30,0x07] +#CHECK: vlerg %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x30,0x07] +#CHECK: vlerg %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x30,0x07] +#CHECK: vlerg %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x30,0x07] +#CHECK: vlerg %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x30,0x07] +#CHECK: vlerg %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x38,0x07] +#CHECK: vlerg %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x38,0x07] + + vlerg %v0, 0 + vlerg %v0, 4095 + vlerg %v0, 0(%r15) + vlerg %v0, 0(%r15,%r1) + vlerg %v15, 0 + vlerg %v31, 0 + vlerg %v18, 0x567(%r3,%r4) + +#CHECK: vlerh %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x10,0x07] +#CHECK: vlerh %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x10,0x07] +#CHECK: vlerh %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x10,0x07] +#CHECK: vlerh %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x10,0x07] +#CHECK: vlerh %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x10,0x07] +#CHECK: vlerh %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x18,0x07] +#CHECK: vlerh %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x18,0x07] + + vlerh %v0, 0 + vlerh %v0, 4095 + vlerh %v0, 0(%r15) + vlerh %v0, 0(%r15,%r1) + vlerh %v15, 0 + vlerh %v31, 0 + vlerh %v18, 0x567(%r3,%r4) + +#CHECK: vllebrz %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x04] +#CHECK: vllebrz %v0, 0, 15 # encoding: [0xe6,0x00,0x00,0x00,0xf0,0x04] +#CHECK: vllebrz %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x04] +#CHECK: vllebrz %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x04] +#CHECK: vllebrz %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x04] +#CHECK: vllebrz %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x04] +#CHECK: vllebrz %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x04] +#CHECK: vllebrz %v18, 1383(%r3,%r4), 11 # encoding: [0xe6,0x23,0x45,0x67,0xb8,0x04] + + vllebrz %v0, 0, 0 + vllebrz %v0, 0, 15 + vllebrz %v0, 4095, 0 + vllebrz %v0, 0(%r15), 0 + vllebrz %v0, 0(%r15,%r1), 0 + vllebrz %v15, 0, 0 + vllebrz %v31, 0, 0 + vllebrz %v18, 0x567(%r3,%r4), 11 + +#CHECK: vllebrze %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x60,0x04] +#CHECK: vllebrze %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x60,0x04] +#CHECK: vllebrze %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x60,0x04] +#CHECK: vllebrze %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x60,0x04] +#CHECK: vllebrze %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x60,0x04] +#CHECK: vllebrze %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x68,0x04] +#CHECK: vllebrze %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x68,0x04] + + vllebrze %v0, 0 + vllebrze %v0, 4095 + vllebrze %v0, 0(%r15) + vllebrze %v0, 0(%r15,%r1) + vllebrze %v15, 0 + vllebrze %v31, 0 + vllebrze %v18, 0x567(%r3,%r4) + +#CHECK: vllebrzf %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x20,0x04] +#CHECK: vllebrzf %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x20,0x04] +#CHECK: vllebrzf %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x20,0x04] +#CHECK: vllebrzf %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x20,0x04] +#CHECK: vllebrzf %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x20,0x04] +#CHECK: vllebrzf %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x28,0x04] +#CHECK: vllebrzf %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x28,0x04] + + vllebrzf %v0, 0 + vllebrzf %v0, 4095 + vllebrzf %v0, 0(%r15) + vllebrzf %v0, 0(%r15,%r1) + vllebrzf %v15, 0 + vllebrzf %v31, 0 + vllebrzf %v18, 0x567(%r3,%r4) + +#CHECK: vllebrzg %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x30,0x04] +#CHECK: vllebrzg %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x30,0x04] +#CHECK: vllebrzg %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x30,0x04] +#CHECK: vllebrzg %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x30,0x04] +#CHECK: vllebrzg %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x30,0x04] +#CHECK: vllebrzg %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x38,0x04] +#CHECK: vllebrzg %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x38,0x04] + + vllebrzg %v0, 0 + vllebrzg %v0, 4095 + vllebrzg %v0, 0(%r15) + vllebrzg %v0, 0(%r15,%r1) + vllebrzg %v15, 0 + vllebrzg %v31, 0 + vllebrzg %v18, 0x567(%r3,%r4) + +#CHECK: vllebrzh %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x10,0x04] +#CHECK: vllebrzh %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x10,0x04] +#CHECK: vllebrzh %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x10,0x04] +#CHECK: vllebrzh %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x10,0x04] +#CHECK: vllebrzh %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x10,0x04] +#CHECK: vllebrzh %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x18,0x04] +#CHECK: vllebrzh %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x18,0x04] + + vllebrzh %v0, 0 + vllebrzh %v0, 4095 + vllebrzh %v0, 0(%r15) + vllebrzh %v0, 0(%r15,%r1) + vllebrzh %v15, 0 + vllebrzh %v31, 0 + vllebrzh %v18, 0x567(%r3,%r4) + +#CHECK: vsld %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x86] +#CHECK: vsld %v0, %v0, %v0, 255 # encoding: [0xe7,0x00,0x00,0xff,0x00,0x86] +#CHECK: vsld %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x86] +#CHECK: vsld %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x86] +#CHECK: vsld %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x86] +#CHECK: vsld %v13, %v17, %v21, 121 # encoding: [0xe7,0xd1,0x50,0x79,0x06,0x86] + + vsld %v0, %v0, %v0, 0 + vsld %v0, %v0, %v0, 255 + vsld %v0, %v0, %v31, 0 + vsld %v0, %v31, %v0, 0 + vsld %v31, %v0, %v0, 0 + vsld %v13, %v17, %v21, 0x79 + +#CHECK: vsrd %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x87] +#CHECK: vsrd %v0, %v0, %v0, 255 # encoding: [0xe7,0x00,0x00,0xff,0x00,0x87] +#CHECK: vsrd %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x87] +#CHECK: vsrd %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x87] +#CHECK: vsrd %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x87] +#CHECK: vsrd %v13, %v17, %v21, 121 # encoding: [0xe7,0xd1,0x50,0x79,0x06,0x87] + + vsrd %v0, %v0, %v0, 0 + vsrd %v0, %v0, %v0, 255 + vsrd %v0, %v0, %v31, 0 + vsrd %v0, %v31, %v0, 0 + vsrd %v31, %v0, %v0, 0 + vsrd %v13, %v17, %v21, 0x79 + +#CHECK: vstbr %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x0e] +#CHECK: vstbr %v0, 0, 15 # encoding: [0xe6,0x00,0x00,0x00,0xf0,0x0e] +#CHECK: vstbr %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x0e] +#CHECK: vstbr %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x0e] +#CHECK: vstbr %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x0e] +#CHECK: vstbr %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x0e] +#CHECK: vstbr %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x0e] +#CHECK: vstbr %v18, 1383(%r3,%r4), 11 # encoding: [0xe6,0x23,0x45,0x67,0xb8,0x0e] + + vstbr %v0, 0, 0 + vstbr %v0, 0, 15 + vstbr %v0, 4095, 0 + vstbr %v0, 0(%r15), 0 + vstbr %v0, 0(%r15,%r1), 0 + vstbr %v15, 0, 0 + vstbr %v31, 0, 0 + vstbr %v18, 0x567(%r3,%r4), 11 + +#CHECK: vstbrf %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x20,0x0e] +#CHECK: vstbrf %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x20,0x0e] +#CHECK: vstbrf %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x20,0x0e] +#CHECK: vstbrf %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x20,0x0e] +#CHECK: vstbrf %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x20,0x0e] +#CHECK: vstbrf %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x28,0x0e] +#CHECK: vstbrf %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x28,0x0e] + + vstbrf %v0, 0 + vstbrf %v0, 4095 + vstbrf %v0, 0(%r15) + vstbrf %v0, 0(%r15,%r1) + vstbrf %v15, 0 + vstbrf %v31, 0 + vstbrf %v18, 0x567(%r3,%r4) + +#CHECK: vstbrg %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x30,0x0e] +#CHECK: vstbrg %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x30,0x0e] +#CHECK: vstbrg %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x30,0x0e] +#CHECK: vstbrg %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x30,0x0e] +#CHECK: vstbrg %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x30,0x0e] +#CHECK: vstbrg %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x38,0x0e] +#CHECK: vstbrg %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x38,0x0e] + + vstbrg %v0, 0 + vstbrg %v0, 4095 + vstbrg %v0, 0(%r15) + vstbrg %v0, 0(%r15,%r1) + vstbrg %v15, 0 + vstbrg %v31, 0 + vstbrg %v18, 0x567(%r3,%r4) + +#CHECK: vstbrh %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x10,0x0e] +#CHECK: vstbrh %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x10,0x0e] +#CHECK: vstbrh %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x10,0x0e] +#CHECK: vstbrh %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x10,0x0e] +#CHECK: vstbrh %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x10,0x0e] +#CHECK: vstbrh %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x18,0x0e] +#CHECK: vstbrh %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x18,0x0e] + + vstbrh %v0, 0 + vstbrh %v0, 4095 + vstbrh %v0, 0(%r15) + vstbrh %v0, 0(%r15,%r1) + vstbrh %v15, 0 + vstbrh %v31, 0 + vstbrh %v18, 0x567(%r3,%r4) + +#CHECK: vstbrq %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x40,0x0e] +#CHECK: vstbrq %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x40,0x0e] +#CHECK: vstbrq %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x40,0x0e] +#CHECK: vstbrq %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x40,0x0e] +#CHECK: vstbrq %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x40,0x0e] +#CHECK: vstbrq %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x48,0x0e] +#CHECK: vstbrq %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x48,0x0e] + + vstbrq %v0, 0 + vstbrq %v0, 4095 + vstbrq %v0, 0(%r15) + vstbrq %v0, 0(%r15,%r1) + vstbrq %v15, 0 + vstbrq %v31, 0 + vstbrq %v18, 0x567(%r3,%r4) + +#CHECK: vstebrf %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x0b] +#CHECK: vstebrf %v0, 0, 3 # encoding: [0xe6,0x00,0x00,0x00,0x30,0x0b] +#CHECK: vstebrf %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x0b] +#CHECK: vstebrf %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x0b] +#CHECK: vstebrf %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x0b] +#CHECK: vstebrf %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x0b] +#CHECK: vstebrf %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x0b] +#CHECK: vstebrf %v18, 1383(%r3,%r4), 2 # encoding: [0xe6,0x23,0x45,0x67,0x28,0x0b] + + vstebrf %v0, 0, 0 + vstebrf %v0, 0, 3 + vstebrf %v0, 4095, 0 + vstebrf %v0, 0(%r15), 0 + vstebrf %v0, 0(%r15,%r1), 0 + vstebrf %v15, 0, 0 + vstebrf %v31, 0, 0 + vstebrf %v18, 1383(%r3,%r4), 2 + +#CHECK: vstebrg %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x0a] +#CHECK: vstebrg %v0, 0, 1 # encoding: [0xe6,0x00,0x00,0x00,0x10,0x0a] +#CHECK: vstebrg %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x0a] +#CHECK: vstebrg %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x0a] +#CHECK: vstebrg %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x0a] +#CHECK: vstebrg %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x0a] +#CHECK: vstebrg %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x0a] +#CHECK: vstebrg %v18, 1383(%r3,%r4), 1 # encoding: [0xe6,0x23,0x45,0x67,0x18,0x0a] + + vstebrg %v0, 0, 0 + vstebrg %v0, 0, 1 + vstebrg %v0, 4095, 0 + vstebrg %v0, 0(%r15), 0 + vstebrg %v0, 0(%r15,%r1), 0 + vstebrg %v15, 0, 0 + vstebrg %v31, 0, 0 + vstebrg %v18, 1383(%r3,%r4), 1 + +#CHECK: vstebrh %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x09] +#CHECK: vstebrh %v0, 0, 7 # encoding: [0xe6,0x00,0x00,0x00,0x70,0x09] +#CHECK: vstebrh %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x09] +#CHECK: vstebrh %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x09] +#CHECK: vstebrh %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x09] +#CHECK: vstebrh %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x09] +#CHECK: vstebrh %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x09] +#CHECK: vstebrh %v18, 1383(%r3,%r4), 4 # encoding: [0xe6,0x23,0x45,0x67,0x48,0x09] + + vstebrh %v0, 0, 0 + vstebrh %v0, 0, 7 + vstebrh %v0, 4095, 0 + vstebrh %v0, 0(%r15), 0 + vstebrh %v0, 0(%r15,%r1), 0 + vstebrh %v15, 0, 0 + vstebrh %v31, 0, 0 + vstebrh %v18, 1383(%r3,%r4), 4 + +#CHECK: vster %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x0f] +#CHECK: vster %v0, 0, 15 # encoding: [0xe6,0x00,0x00,0x00,0xf0,0x0f] +#CHECK: vster %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x0f] +#CHECK: vster %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x0f] +#CHECK: vster %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x0f] +#CHECK: vster %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x0f] +#CHECK: vster %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x0f] +#CHECK: vster %v18, 1383(%r3,%r4), 11 # encoding: [0xe6,0x23,0x45,0x67,0xb8,0x0f] + + vster %v0, 0, 0 + vster %v0, 0, 15 + vster %v0, 4095, 0 + vster %v0, 0(%r15), 0 + vster %v0, 0(%r15,%r1), 0 + vster %v15, 0, 0 + vster %v31, 0, 0 + vster %v18, 0x567(%r3,%r4), 11 + +#CHECK: vsterf %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x20,0x0f] +#CHECK: vsterf %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x20,0x0f] +#CHECK: vsterf %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x20,0x0f] +#CHECK: vsterf %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x20,0x0f] +#CHECK: vsterf %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x20,0x0f] +#CHECK: vsterf %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x28,0x0f] +#CHECK: vsterf %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x28,0x0f] + + vsterf %v0, 0 + vsterf %v0, 4095 + vsterf %v0, 0(%r15) + vsterf %v0, 0(%r15,%r1) + vsterf %v15, 0 + vsterf %v31, 0 + vsterf %v18, 0x567(%r3,%r4) + +#CHECK: vsterg %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x30,0x0f] +#CHECK: vsterg %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x30,0x0f] +#CHECK: vsterg %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x30,0x0f] +#CHECK: vsterg %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x30,0x0f] +#CHECK: vsterg %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x30,0x0f] +#CHECK: vsterg %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x38,0x0f] +#CHECK: vsterg %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x38,0x0f] + + vsterg %v0, 0 + vsterg %v0, 4095 + vsterg %v0, 0(%r15) + vsterg %v0, 0(%r15,%r1) + vsterg %v15, 0 + vsterg %v31, 0 + vsterg %v18, 0x567(%r3,%r4) + +#CHECK: vsterh %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x10,0x0f] +#CHECK: vsterh %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x10,0x0f] +#CHECK: vsterh %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x10,0x0f] +#CHECK: vsterh %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x10,0x0f] +#CHECK: vsterh %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x10,0x0f] +#CHECK: vsterh %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x18,0x0f] +#CHECK: vsterh %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x18,0x0f] + + vsterh %v0, 0 + vsterh %v0, 4095 + vsterh %v0, 0(%r15) + vsterh %v0, 0(%r15,%r1) + vsterh %v15, 0 + vsterh %v31, 0 + vsterh %v18, 0x567(%r3,%r4) + +#CHECK: vstrs %v0, %v0, %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x8b] +#CHECK: vstrs %v0, %v0, %v0, %v0, 15, 0 # encoding: [0xe7,0x00,0x0f,0x00,0x00,0x8b] +#CHECK: vstrs %v0, %v0, %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x8b] +#CHECK: vstrs %v0, %v0, %v0, %v0, 15, 0 # encoding: [0xe7,0x00,0x0f,0x00,0x00,0x8b] +#CHECK: vstrs %v0, %v0, %v0, %v0, 0, 12 # encoding: [0xe7,0x00,0x00,0xc0,0x00,0x8b] +#CHECK: vstrs %v0, %v0, %v0, %v15, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0x8b] +#CHECK: vstrs %v0, %v0, %v0, %v31, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0x8b] +#CHECK: vstrs %v0, %v0, %v15, %v0, 0, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x8b] +#CHECK: vstrs %v0, %v0, %v31, %v0, 0, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x8b] +#CHECK: vstrs %v0, %v15, %v0, %v0, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x8b] +#CHECK: vstrs %v0, %v31, %v0, %v0, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x8b] +#CHECK: vstrs %v15, %v0, %v0, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x8b] +#CHECK: vstrs %v31, %v0, %v0, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x8b] +#CHECK: vstrs %v18, %v3, %v20, %v5, 11, 4 # encoding: [0xe7,0x23,0x4b,0x40,0x5a,0x8b] +#CHECK: vstrs %v18, %v3, %v20, %v5, 0, 15 # encoding: [0xe7,0x23,0x40,0xf0,0x5a,0x8b] + + vstrs %v0, %v0, %v0, %v0, 0 + vstrs %v0, %v0, %v0, %v0, 15 + vstrs %v0, %v0, %v0, %v0, 0, 0 + vstrs %v0, %v0, %v0, %v0, 15, 0 + vstrs %v0, %v0, %v0, %v0, 0, 12 + vstrs %v0, %v0, %v0, %v15, 0 + vstrs %v0, %v0, %v0, %v31, 0 + vstrs %v0, %v0, %v15, %v0, 0 + vstrs %v0, %v0, %v31, %v0, 0 + vstrs %v0, %v15, %v0, %v0, 0 + vstrs %v0, %v31, %v0, %v0, 0 + vstrs %v15, %v0, %v0, %v0, 0 + vstrs %v31, %v0, %v0, %v0, 0 + vstrs %v18, %v3, %v20, %v5, 11, 4 + vstrs %v18, %v3, %v20, %v5, 0, 15 + +#CHECK: vstrsb %v0, %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x8b] +#CHECK: vstrsb %v0, %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x8b] +#CHECK: vstrsb %v0, %v0, %v0, %v0, 12 # encoding: [0xe7,0x00,0x00,0xc0,0x00,0x8b] +#CHECK: vstrsb %v0, %v0, %v0, %v15, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0x8b] +#CHECK: vstrsb %v0, %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0x8b] +#CHECK: vstrsb %v0, %v0, %v15, %v0, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x8b] +#CHECK: vstrsb %v0, %v0, %v31, %v0, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x8b] +#CHECK: vstrsb %v0, %v15, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x8b] +#CHECK: vstrsb %v0, %v31, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x8b] +#CHECK: vstrsb %v15, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x8b] +#CHECK: vstrsb %v31, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x8b] +#CHECK: vstrsb %v18, %v3, %v20, %v5, 4 # encoding: [0xe7,0x23,0x40,0x40,0x5a,0x8b] +#CHECK: vstrsb %v18, %v3, %v20, %v5, 15 # encoding: [0xe7,0x23,0x40,0xf0,0x5a,0x8b] +#CHECK: vstrszb %v18, %v3, %v20, %v5 # encoding: [0xe7,0x23,0x40,0x20,0x5a,0x8b] + + vstrsb %v0, %v0, %v0, %v0 + vstrsb %v0, %v0, %v0, %v0, 0 + vstrsb %v0, %v0, %v0, %v0, 12 + vstrsb %v0, %v0, %v0, %v15 + vstrsb %v0, %v0, %v0, %v31 + vstrsb %v0, %v0, %v15, %v0 + vstrsb %v0, %v0, %v31, %v0 + vstrsb %v0, %v15, %v0, %v0 + vstrsb %v0, %v31, %v0, %v0 + vstrsb %v15, %v0, %v0, %v0 + vstrsb %v31, %v0, %v0, %v0 + vstrsb %v18, %v3, %v20, %v5, 4 + vstrsb %v18, %v3, %v20, %v5, 15 + vstrszb %v18, %v3, %v20, %v5 + +#CHECK: vstrsf %v0, %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0x8b] +#CHECK: vstrsf %v0, %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0x8b] +#CHECK: vstrsf %v0, %v0, %v0, %v0, 12 # encoding: [0xe7,0x00,0x02,0xc0,0x00,0x8b] +#CHECK: vstrsf %v0, %v0, %v0, %v15, 0 # encoding: [0xe7,0x00,0x02,0x00,0xf0,0x8b] +#CHECK: vstrsf %v0, %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0x02,0x00,0xf1,0x8b] +#CHECK: vstrsf %v0, %v0, %v15, %v0, 0 # encoding: [0xe7,0x00,0xf2,0x00,0x00,0x8b] +#CHECK: vstrsf %v0, %v0, %v31, %v0, 0 # encoding: [0xe7,0x00,0xf2,0x00,0x02,0x8b] +#CHECK: vstrsf %v0, %v15, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x02,0x00,0x00,0x8b] +#CHECK: vstrsf %v0, %v31, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x02,0x00,0x04,0x8b] +#CHECK: vstrsf %v15, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x02,0x00,0x00,0x8b] +#CHECK: vstrsf %v31, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x02,0x00,0x08,0x8b] +#CHECK: vstrsf %v18, %v3, %v20, %v5, 4 # encoding: [0xe7,0x23,0x42,0x40,0x5a,0x8b] +#CHECK: vstrsf %v18, %v3, %v20, %v5, 15 # encoding: [0xe7,0x23,0x42,0xf0,0x5a,0x8b] +#CHECK: vstrszf %v18, %v3, %v20, %v5 # encoding: [0xe7,0x23,0x42,0x20,0x5a,0x8b] + + vstrsf %v0, %v0, %v0, %v0 + vstrsf %v0, %v0, %v0, %v0, 0 + vstrsf %v0, %v0, %v0, %v0, 12 + vstrsf %v0, %v0, %v0, %v15 + vstrsf %v0, %v0, %v0, %v31 + vstrsf %v0, %v0, %v15, %v0 + vstrsf %v0, %v0, %v31, %v0 + vstrsf %v0, %v15, %v0, %v0 + vstrsf %v0, %v31, %v0, %v0 + vstrsf %v15, %v0, %v0, %v0 + vstrsf %v31, %v0, %v0, %v0 + vstrsf %v18, %v3, %v20, %v5, 4 + vstrsf %v18, %v3, %v20, %v5, 15 + vstrszf %v18, %v3, %v20, %v5 + +#CHECK: vstrsh %v0, %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x01,0x00,0x00,0x8b] +#CHECK: vstrsh %v0, %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x01,0x00,0x00,0x8b] +#CHECK: vstrsh %v0, %v0, %v0, %v0, 12 # encoding: [0xe7,0x00,0x01,0xc0,0x00,0x8b] +#CHECK: vstrsh %v0, %v0, %v0, %v15, 0 # encoding: [0xe7,0x00,0x01,0x00,0xf0,0x8b] +#CHECK: vstrsh %v0, %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0x01,0x00,0xf1,0x8b] +#CHECK: vstrsh %v0, %v0, %v15, %v0, 0 # encoding: [0xe7,0x00,0xf1,0x00,0x00,0x8b] +#CHECK: vstrsh %v0, %v0, %v31, %v0, 0 # encoding: [0xe7,0x00,0xf1,0x00,0x02,0x8b] +#CHECK: vstrsh %v0, %v15, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x01,0x00,0x00,0x8b] +#CHECK: vstrsh %v0, %v31, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x01,0x00,0x04,0x8b] +#CHECK: vstrsh %v15, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x01,0x00,0x00,0x8b] +#CHECK: vstrsh %v31, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x01,0x00,0x08,0x8b] +#CHECK: vstrsh %v18, %v3, %v20, %v5, 4 # encoding: [0xe7,0x23,0x41,0x40,0x5a,0x8b] +#CHECK: vstrsh %v18, %v3, %v20, %v5, 15 # encoding: [0xe7,0x23,0x41,0xf0,0x5a,0x8b] +#CHECK: vstrszh %v18, %v3, %v20, %v5 # encoding: [0xe7,0x23,0x41,0x20,0x5a,0x8b] + + vstrsh %v0, %v0, %v0, %v0 + vstrsh %v0, %v0, %v0, %v0, 0 + vstrsh %v0, %v0, %v0, %v0, 12 + vstrsh %v0, %v0, %v0, %v15 + vstrsh %v0, %v0, %v0, %v31 + vstrsh %v0, %v0, %v15, %v0 + vstrsh %v0, %v0, %v31, %v0 + vstrsh %v0, %v15, %v0, %v0 + vstrsh %v0, %v31, %v0, %v0 + vstrsh %v15, %v0, %v0, %v0 + vstrsh %v31, %v0, %v0, %v0 + vstrsh %v18, %v3, %v20, %v5, 4 + vstrsh %v18, %v3, %v20, %v5, 15 + vstrszh %v18, %v3, %v20, %v5 + +#CHECK: wcefb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc3] +#CHECK: wcefb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc3] +#CHECK: wcefb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x20,0xc3] +#CHECK: wcefb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xc3] +#CHECK: wcefb %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xc3] +#CHECK: wcefb %f0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x24,0xc3] +#CHECK: wcefb %v31, %f0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x28,0xc3] +#CHECK: wcefb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x24,0xc3] + + wcefb %v0, %v0, 0, 0 + wcefb %f0, %f0, 0, 0 + wcefb %v0, %v0, 0, 15 + wcefb %v0, %v0, 4, 0 + wcefb %v0, %v0, 12, 0 + wcefb %v0, %v31, 0, 0 + wcefb %v31, %v0, 0, 0 + wcefb %v14, %v17, 4, 10 + +#CHECK: wcelfb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc1] +#CHECK: wcelfb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc1] +#CHECK: wcelfb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x20,0xc1] +#CHECK: wcelfb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xc1] +#CHECK: wcelfb %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xc1] +#CHECK: wcelfb %f0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x24,0xc1] +#CHECK: wcelfb %v31, %f0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x28,0xc1] +#CHECK: wcelfb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x24,0xc1] + + wcelfb %v0, %v0, 0, 0 + wcelfb %f0, %f0, 0, 0 + wcelfb %v0, %v0, 0, 15 + wcelfb %v0, %v0, 4, 0 + wcelfb %v0, %v0, 12, 0 + wcelfb %v0, %v31, 0, 0 + wcelfb %v31, %v0, 0, 0 + wcelfb %v14, %v17, 4, 10 + +#CHECK: wcfeb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc2] +#CHECK: wcfeb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc2] +#CHECK: wcfeb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x20,0xc2] +#CHECK: wcfeb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xc2] +#CHECK: wcfeb %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xc2] +#CHECK: wcfeb %f0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x24,0xc2] +#CHECK: wcfeb %v31, %f0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x28,0xc2] +#CHECK: wcfeb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x24,0xc2] + + wcfeb %v0, %v0, 0, 0 + wcfeb %f0, %f0, 0, 0 + wcfeb %v0, %v0, 0, 15 + wcfeb %v0, %v0, 4, 0 + wcfeb %v0, %v0, 12, 0 + wcfeb %v0, %v31, 0, 0 + wcfeb %v31, %v0, 0, 0 + wcfeb %v14, %v17, 4, 10 + +#CHECK: wclfeb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc0] +#CHECK: wclfeb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc0] +#CHECK: wclfeb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x20,0xc0] +#CHECK: wclfeb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xc0] +#CHECK: wclfeb %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xc0] +#CHECK: wclfeb %f0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x24,0xc0] +#CHECK: wclfeb %v31, %f0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x28,0xc0] +#CHECK: wclfeb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x24,0xc0] + + wclfeb %v0, %v0, 0, 0 + wclfeb %f0, %f0, 0, 0 + wclfeb %v0, %v0, 0, 15 + wclfeb %v0, %v0, 4, 0 + wclfeb %v0, %v0, 12, 0 + wclfeb %v0, %v31, 0, 0 + wclfeb %v31, %v0, 0, 0 + wclfeb %v14, %v17, 4, 10 + diff --git a/llvm/test/Verifier/SystemZ/intrinsic-immarg.ll b/llvm/test/Verifier/SystemZ/intrinsic-immarg.ll index 72cbae7507d..eaf308f7b22 100644 --- a/llvm/test/Verifier/SystemZ/intrinsic-immarg.ll +++ b/llvm/test/Verifier/SystemZ/intrinsic-immarg.ll @@ -381,3 +381,21 @@ define <16 x i8> @test_vsldb(<16 x i8> %a, <16 x i8> %b, i32 %c) { ret <16 x i8> %res } +declare <16 x i8> @llvm.s390.vsld(<16 x i8>, <16 x i8>, i32) +define <16 x i8> @test_vsld(<16 x i8> %a, <16 x i8> %b, i32 %c) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %c + ; CHECK-NEXT: %res = call <16 x i8> @llvm.s390.vsld(<16 x i8> %a, <16 x i8> %b, i32 %c) + %res = call <16 x i8> @llvm.s390.vsld(<16 x i8> %a, <16 x i8> %b, i32 %c) + ret <16 x i8> %res +} + +declare <16 x i8> @llvm.s390.vsrd(<16 x i8>, <16 x i8>, i32) +define <16 x i8> @test_vsrd(<16 x i8> %a, <16 x i8> %b, i32 %c) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %c + ; CHECK-NEXT: %res = call <16 x i8> @llvm.s390.vsrd(<16 x i8> %a, <16 x i8> %b, i32 %c) + %res = call <16 x i8> @llvm.s390.vsrd(<16 x i8> %a, <16 x i8> %b, i32 %c) + ret <16 x i8> %res +} + |