diff options
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 61 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 247 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZISelLowering.h | 25 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZInstrVector.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZOperators.td | 1 | ||||
| -rw-r--r-- | llvm/test/CodeGen/SystemZ/fp-const-11.ll | 30 | ||||
| -rw-r--r-- | llvm/test/CodeGen/SystemZ/vec-const-05.ll | 57 | ||||
| -rw-r--r-- | llvm/test/CodeGen/SystemZ/vec-const-06.ll | 40 | ||||
| -rw-r--r-- | llvm/test/CodeGen/SystemZ/vec-const-19.ll | 18 | 
9 files changed, 299 insertions, 182 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index a963638be98..6c30057e5e2 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -304,6 +304,9 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {    void splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0,                             uint64_t UpperVal, uint64_t LowerVal); +  void loadVectorConstant(const SystemZVectorConstantInfo &VCI, +                          SDNode *Node); +    // Try to use gather instruction Opcode to implement vector insertion N.    bool tryGather(SDNode *N, unsigned Opcode); @@ -1132,6 +1135,35 @@ void SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node,    SelectCode(Or.getNode());  } +void SystemZDAGToDAGISel::loadVectorConstant( +    const SystemZVectorConstantInfo &VCI, SDNode *Node) { +  assert((VCI.Opcode == SystemZISD::BYTE_MASK || +          VCI.Opcode == SystemZISD::REPLICATE || +          VCI.Opcode == SystemZISD::ROTATE_MASK) && +         "Bad opcode!"); +  assert(VCI.VecVT.getSizeInBits() == 128 && "Expected a vector type"); +  EVT VT = Node->getValueType(0); +  SDLoc DL(Node); +  SmallVector<SDValue, 2> Ops; +  for (unsigned OpVal : VCI.OpVals) +    Ops.push_back(CurDAG->getConstant(OpVal, DL, MVT::i32)); +  SDValue Op = CurDAG->getNode(VCI.Opcode, DL, VCI.VecVT, Ops); + +  if (VCI.VecVT == VT.getSimpleVT()) +    ReplaceNode(Node, Op.getNode()); +  else if (VT.getSizeInBits() == 128) { +    SDValue BitCast = CurDAG->getNode(ISD::BITCAST, DL, VT, Op); +    ReplaceNode(Node, BitCast.getNode()); +    SelectCode(BitCast.getNode()); +  } else { // float or double +    unsigned SubRegIdx = +        (VT.getSizeInBits() == 32 ? SystemZ::subreg_h32 : SystemZ::subreg_h64); +    ReplaceNode( +        Node, CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, Op).getNode()); +  } +  SelectCode(Op.getNode()); +} +  bool SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) {    SDValue ElemV = N->getOperand(2);    auto *ElemN = dyn_cast<ConstantSDNode>(ElemV); @@ -1529,13 +1561,9 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {    case ISD::BUILD_VECTOR: {      auto *BVN = cast<BuildVectorSDNode>(Node); -    SDLoc DL(Node); -    EVT VT = Node->getValueType(0); -    uint64_t Mask = 0; -    if (SystemZTargetLowering::tryBuildVectorByteMask(BVN, Mask)) { -      SDNode *Res = CurDAG->getMachineNode(SystemZ::VGBM, DL, VT, -                                CurDAG->getTargetConstant(Mask, DL, MVT::i32)); -      ReplaceNode(Node, Res); +    SystemZVectorConstantInfo VCI(BVN); +    if (VCI.isVectorConstantLegal(*Subtarget)) { +      loadVectorConstant(VCI, Node);        return;      }      break; @@ -1545,23 +1573,10 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {      APFloat Imm = cast<ConstantFPSDNode>(Node)->getValueAPF();      if (Imm.isZero() || Imm.isNegZero())        break; -    const SystemZInstrInfo *TII = getInstrInfo(); -    EVT VT = Node->getValueType(0); -    unsigned Start, End; -    unsigned BitWidth = VT.getSizeInBits(); -    bool Success = SystemZTargetLowering::analyzeFPImm(Imm, BitWidth, Start, -              End, static_cast<const SystemZInstrInfo *>(TII)); (void)Success; +    SystemZVectorConstantInfo VCI(Imm); +    bool Success = VCI.isVectorConstantLegal(*Subtarget); (void)Success;      assert(Success && "Expected legal FP immediate"); -    SDLoc DL(Node); -    unsigned Opcode = (BitWidth == 32 ? SystemZ::VGMF : SystemZ::VGMG); -    SDNode *Res = CurDAG->getMachineNode(Opcode, DL, VT, -                            CurDAG->getTargetConstant(Start, DL, MVT::i32), -                            CurDAG->getTargetConstant(End, DL, MVT::i32)); -    unsigned SubRegIdx = (BitWidth == 32 ? SystemZ::subreg_h32 -                                         : SystemZ::subreg_h64); -    Res = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SDValue(Res, 0)) -            .getNode(); -    ReplaceNode(Node, Res); +    loadVectorConstant(VCI, Node);      return;    } diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 310fd41354e..7012d9a243f 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -577,26 +577,118 @@ bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {    return false;  } +// Return true if the constant can be generated with a vector instruction, +// such as VGM, VGMB or VREPI. +bool SystemZVectorConstantInfo::isVectorConstantLegal( +    const SystemZSubtarget &Subtarget) { +  const SystemZInstrInfo *TII = +      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); +  if (!Subtarget.hasVector() || +      (isFP128 && !Subtarget.hasVectorEnhancements1())) +    return false; -// Return true if Imm can be generated with a vector instruction, such as VGM. -bool SystemZTargetLowering:: -analyzeFPImm(const APFloat &Imm, unsigned BitWidth, unsigned &Start, -             unsigned &End, const SystemZInstrInfo *TII) { -  APInt IntImm = Imm.bitcastToAPInt(); -  if (IntImm.getActiveBits() > 64) +  // Try using VECTOR GENERATE BYTE MASK.  This is the architecturally- +  // preferred way of creating all-zero and all-one vectors so give it +  // priority over other methods below. +  unsigned Mask = 0; +  unsigned I = 0; +  for (; I < SystemZ::VectorBytes; ++I) { +    uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue(); +    if (Byte == 0xff) +      Mask |= 1ULL << I; +    else if (Byte != 0) +      break; +  } +  if (I == SystemZ::VectorBytes) { +    Opcode = SystemZISD::BYTE_MASK; +    OpVals.push_back(Mask); +    VecVT = MVT::getVectorVT(MVT::getIntegerVT(8), 16); +    return true; +  } + +  if (SplatBitSize > 64)      return false; -  // See if this immediate could be generated with VGM. -  bool Success = TII->isRxSBGMask(IntImm.getZExtValue(), BitWidth, Start, End); -  if (!Success) +  auto tryValue = [&](uint64_t Value) -> bool { +    // Try VECTOR REPLICATE IMMEDIATE +    int64_t SignedValue = SignExtend64(Value, SplatBitSize); +    if (isInt<16>(SignedValue)) { +      OpVals.push_back(((unsigned) SignedValue)); +      Opcode = SystemZISD::REPLICATE; +      VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize), +                               SystemZ::VectorBits / SplatBitSize); +      return true; +    } +    // Try VECTOR GENERATE MASK +    unsigned Start, End; +    if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) { +      // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0 +      // denoting 1 << 63 and 63 denoting 1.  Convert them to bit numbers for +      // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1). +      OpVals.push_back(Start - (64 - SplatBitSize)); +      OpVals.push_back(End - (64 - SplatBitSize)); +      Opcode = SystemZISD::ROTATE_MASK; +      VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize), +                               SystemZ::VectorBits / SplatBitSize); +      return true; +    }      return false; -  // isRxSBGMask returns the bit numbers for a full 64-bit value, -  // with 0 denoting 1 << 63 and 63 denoting 1.  Convert them to -  // bit numbers for an BitsPerElement value, so that 0 denotes -  // 1 << (BitsPerElement-1). -  Start -= 64 - BitWidth; -  End -= 64 - BitWidth; -  return true; +  }; + +  // First try assuming that any undefined bits above the highest set bit +  // and below the lowest set bit are 1s.  This increases the likelihood of +  // being able to use a sign-extended element value in VECTOR REPLICATE +  // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK. +  uint64_t SplatBitsZ = SplatBits.getZExtValue(); +  uint64_t SplatUndefZ = SplatUndef.getZExtValue(); +  uint64_t Lower = +      (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1)); +  uint64_t Upper = +      (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1)); +  if (tryValue(SplatBitsZ | Upper | Lower)) +    return true; + +  // Now try assuming that any undefined bits between the first and +  // last defined set bits are set.  This increases the chances of +  // using a non-wraparound mask. +  uint64_t Middle = SplatUndefZ & ~Upper & ~Lower; +  return tryValue(SplatBitsZ | Middle); +} + +SystemZVectorConstantInfo::SystemZVectorConstantInfo(APFloat FPImm) { +  IntBits = FPImm.bitcastToAPInt().zextOrSelf(128); +  isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad()); + +  // Find the smallest splat. +  SplatBits = FPImm.bitcastToAPInt(); +  unsigned Width = SplatBits.getBitWidth(); +  while (Width > 8) { +    unsigned HalfSize = Width / 2; +    APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize); +    APInt LowValue = SplatBits.trunc(HalfSize); + +    // If the two halves do not match, stop here. +    if (HighValue != LowValue || 8 > HalfSize) +      break; + +    SplatBits = HighValue; +    Width = HalfSize; +  } +  SplatUndef = 0; +  SplatBitSize = Width; +} + +SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode *BVN) { +  assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR"); +  bool HasAnyUndefs; + +  // Get IntBits by finding the 128 bit splat. +  BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128, +                       true); + +  // Get SplatBits by finding the 8 bit or greater splat. +  BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8, +                       true);  }  bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { @@ -604,12 +696,7 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {    if (Imm.isZero() || Imm.isNegZero())      return true; -  if (!Subtarget.hasVector()) -    return false; -  const SystemZInstrInfo *TII = -      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); -  unsigned Start, End; -  return analyzeFPImm(Imm, VT.getSizeInBits(), Start, End, TII); +  return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);  }  bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { @@ -4289,78 +4376,6 @@ static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,    return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);  } -// Try to represent constant BUILD_VECTOR node BVN using a BYTE MASK style -// mask.  Store the mask value in Mask on success. -bool SystemZTargetLowering:: -tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) { -  EVT ElemVT = BVN->getValueType(0).getVectorElementType(); -  unsigned BytesPerElement = ElemVT.getStoreSize(); -  for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) { -    SDValue Op = BVN->getOperand(I); -    if (!Op.isUndef()) { -      uint64_t Value; -      if (Op.getOpcode() == ISD::Constant) -        Value = cast<ConstantSDNode>(Op)->getZExtValue(); -      else if (Op.getOpcode() == ISD::ConstantFP) -        Value = (cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt() -                 .getZExtValue()); -      else -        return false; -      for (unsigned J = 0; J < BytesPerElement; ++J) { -        uint64_t Byte = (Value >> (J * 8)) & 0xff; -        if (Byte == 0xff) -          Mask |= 1ULL << ((E - I - 1) * BytesPerElement + J); -        else if (Byte != 0) -          return false; -      } -    } -  } -  return true; -} - -// Try to load a vector constant in which BitsPerElement-bit value Value -// is replicated to fill the vector.  VT is the type of the resulting -// constant, which may have elements of a different size from BitsPerElement. -// Return the SDValue of the constant on success, otherwise return -// an empty value. -static SDValue tryBuildVectorReplicate(SelectionDAG &DAG, -                                       const SystemZInstrInfo *TII, -                                       const SDLoc &DL, EVT VT, uint64_t Value, -                                       unsigned BitsPerElement) { -  // Signed 16-bit values can be replicated using VREPI. -  // Mark the constants as opaque or DAGCombiner will convert back to -  // BUILD_VECTOR. -  int64_t SignedValue = SignExtend64(Value, BitsPerElement); -  if (isInt<16>(SignedValue)) { -    MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement), -                                 SystemZ::VectorBits / BitsPerElement); -    SDValue Op = DAG.getNode( -        SystemZISD::REPLICATE, DL, VecVT, -        DAG.getConstant(SignedValue, DL, MVT::i32, false, true /*isOpaque*/)); -    return DAG.getNode(ISD::BITCAST, DL, VT, Op); -  } -  // See whether rotating the constant left some N places gives a value that -  // is one less than a power of 2 (i.e. all zeros followed by all ones). -  // If so we can use VGM. -  unsigned Start, End; -  if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) { -    // isRxSBGMask returns the bit numbers for a full 64-bit value, -    // with 0 denoting 1 << 63 and 63 denoting 1.  Convert them to -    // bit numbers for an BitsPerElement value, so that 0 denotes -    // 1 << (BitsPerElement-1). -    Start -= 64 - BitsPerElement; -    End -= 64 - BitsPerElement; -    MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement), -                                 SystemZ::VectorBits / BitsPerElement); -    SDValue Op = DAG.getNode( -        SystemZISD::ROTATE_MASK, DL, VecVT, -        DAG.getConstant(Start, DL, MVT::i32, false, true /*isOpaque*/), -        DAG.getConstant(End, DL, MVT::i32, false, true /*isOpaque*/)); -    return DAG.getNode(ISD::BITCAST, DL, VT, Op); -  } -  return SDValue(); -} -  // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually  // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for  // the non-EXTRACT_VECTOR_ELT elements.  See if the given BUILD_VECTOR @@ -4561,55 +4576,14 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,  SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,                                                   SelectionDAG &DAG) const { -  const SystemZInstrInfo *TII = -    static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());    auto *BVN = cast<BuildVectorSDNode>(Op.getNode());    SDLoc DL(Op);    EVT VT = Op.getValueType();    if (BVN->isConstant()) { -    // Try using VECTOR GENERATE BYTE MASK.  This is the architecturally- -    // preferred way of creating all-zero and all-one vectors so give it -    // priority over other methods below. -    uint64_t Mask; -    if (ISD::isBuildVectorAllZeros(Op.getNode()) || -        ISD::isBuildVectorAllOnes(Op.getNode()) || -        (VT.isInteger() && tryBuildVectorByteMask(BVN, Mask))) +    if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))        return Op; -    // Try using some form of replication. -    APInt SplatBits, SplatUndef; -    unsigned SplatBitSize; -    bool HasAnyUndefs; -    if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, -                             8, true) && -        SplatBitSize <= 64) { -      // First try assuming that any undefined bits above the highest set bit -      // and below the lowest set bit are 1s.  This increases the likelihood of -      // being able to use a sign-extended element value in VECTOR REPLICATE -      // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK. -      uint64_t SplatBitsZ = SplatBits.getZExtValue(); -      uint64_t SplatUndefZ = SplatUndef.getZExtValue(); -      uint64_t Lower = (SplatUndefZ -                        & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1)); -      uint64_t Upper = (SplatUndefZ -                        & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1)); -      uint64_t Value = SplatBitsZ | Upper | Lower; -      SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, -                                           SplatBitSize); -      if (Op.getNode()) -        return Op; - -      // Now try assuming that any undefined bits between the first and -      // last defined set bits are set.  This increases the chances of -      // using a non-wraparound mask. -      uint64_t Middle = SplatUndefZ & ~Upper & ~Lower; -      Value = SplatBitsZ | Middle; -      Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize); -      if (Op.getNode()) -        return Op; -    } -      // Fall back to loading it from memory.      return SDValue();    } @@ -5055,6 +5029,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {      OPCODE(TBEGIN);      OPCODE(TBEGIN_NOFLOAT);      OPCODE(TEND); +    OPCODE(BYTE_MASK);      OPCODE(ROTATE_MASK);      OPCODE(REPLICATE);      OPCODE(JOIN_DWORDS); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 8efe2204581..b8dc4ab65f4 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -162,6 +162,10 @@ enum NodeType : unsigned {    // Transaction end.  Just the chain operand.  Returns CC value and chain.    TEND, +  // Create a vector constant by filling byte N of the result with bit +  // 15-N of the single operand. +  BYTE_MASK, +    // Create a vector constant by replicating an element-sized RISBG-style mask.    // The first operand specifies the starting set bit and the second operand    // specifies the ending set bit.  Both operands count from the MSB of the @@ -513,9 +517,6 @@ public:      return true;    } -  static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask); -  static bool analyzeFPImm(const APFloat &Imm, unsigned BitWidth, -                 unsigned &Start, unsigned &End, const SystemZInstrInfo *TII);  private:    const SystemZSubtarget &Subtarget; @@ -643,6 +644,24 @@ private:    const TargetRegisterClass *getRepRegClassFor(MVT VT) const override;  }; + +struct SystemZVectorConstantInfo { +private: +  APInt IntBits;             // The 128 bits as an integer. +  APInt SplatBits;           // Smallest splat value. +  APInt SplatUndef;          // Bits correspoding to undef operands of the BVN. +  unsigned SplatBitSize = 0; +  bool isFP128 = false; + +public: +  unsigned Opcode = 0; +  SmallVector<unsigned, 2> OpVals; +  MVT VecVT; +  SystemZVectorConstantInfo(APFloat FPImm); +  SystemZVectorConstantInfo(BuildVectorSDNode *BVN); +  bool isVectorConstantLegal(const SystemZSubtarget &Subtarget); +}; +  } // end namespace llvm  #endif diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td index dd2a0d58cdf..82cca0b1217 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -60,7 +60,7 @@ let Predicates = [FeatureVector] in {      // Generate byte mask.      def VZERO : InherentVRIa<"vzero", 0xE744, 0>;      def VONE  : InherentVRIa<"vone", 0xE744, 0xffff>; -    def VGBM  : UnaryVRIa<"vgbm", 0xE744, null_frag, v128b, imm32zx16>; +    def VGBM  : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>;      // Generate mask.      def VGM  : BinaryVRIbGeneric<"vgm", 0xE746>; diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td index 9914db8651c..876a8099d5f 100644 --- a/llvm/lib/Target/SystemZ/SystemZOperators.td +++ b/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -286,6 +286,7 @@ def z_vector_insert     : SDNode<"ISD::INSERT_VECTOR_ELT",                                   SDT_ZInsertVectorElt>;  def z_vector_extract    : SDNode<"ISD::EXTRACT_VECTOR_ELT",                                   SDT_ZExtractVectorElt>; +def z_byte_mask         : SDNode<"SystemZISD::BYTE_MASK", SDT_ZReplicate>;  def z_rotate_mask       : SDNode<"SystemZISD::ROTATE_MASK", SDT_ZRotateMask>;  def z_replicate         : SDNode<"SystemZISD::REPLICATE", SDT_ZReplicate>;  def z_join_dwords       : SDNode<"SystemZISD::JOIN_DWORDS", SDT_ZJoinDwords>; diff --git a/llvm/test/CodeGen/SystemZ/fp-const-11.ll b/llvm/test/CodeGen/SystemZ/fp-const-11.ll index 8523f2786c3..ff8289d4fad 100644 --- a/llvm/test/CodeGen/SystemZ/fp-const-11.ll +++ b/llvm/test/CodeGen/SystemZ/fp-const-11.ll @@ -38,3 +38,33 @@ define void @f3(fp128 *%x) {    store fp128 0xL00000000000000003fff000002000000, fp128 *%x    ret void  } + +; Test that VGBM works. +define void @f4(fp128 *%x) { +; CHECK-LABEL: f4: +; CHECK:      vgbm %v0, 21845 +; CHECK-NEXT: vst %v0, 0(%r2) +; CHECK-NEXT: br %r14 +  store fp128 0xL00ff00ff00ff00ff00ff00ff00ff00ff, fp128 *%x +  ret void +} + +; Test that VREPI works. +define void @f5(fp128 *%x) { +; CHECK-LABEL: f5: +; CHECK:      vrepib  %v0, -8 +; CHECK-NEXT: vst %v0, 0(%r2) +; CHECK-NEXT: br %r14 +  store fp128 0xLf8f8f8f8f8f8f8f8f8f8f8f8f8f8f8f8, fp128 *%x +  ret void +} + +; Test that VGM works. +define void @f6(fp128 *%x) { +; CHECK-LABEL: f6: +; CHECK:      vgmg %v0, 12, 31 +; CHECK-NEXT: vst %v0, 0(%r2) +; CHECK-NEXT: br %r14 +  store fp128 0xL000fffff00000000000fffff00000000, fp128 *%x +  ret void +} diff --git a/llvm/test/CodeGen/SystemZ/vec-const-05.ll b/llvm/test/CodeGen/SystemZ/vec-const-05.ll index 719280e9d60..55f3cdd5902 100644 --- a/llvm/test/CodeGen/SystemZ/vec-const-05.ll +++ b/llvm/test/CodeGen/SystemZ/vec-const-05.ll @@ -1,28 +1,63 @@ -; Test vector byte masks, v4f32 version. Only all-zero vectors are handled. +; Test vector byte masks, v4f32 version.  ;  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s  ; Test an all-zeros vector. -define <4 x float> @f0() { -; CHECK-LABEL: f0: +define <4 x float> @f1() { +; CHECK-LABEL: f1:  ; CHECK: vgbm %v24, 0  ; CHECK: br %r14    ret <4 x float> zeroinitializer  } -; Test that undefs are treated as zero. -define <4 x float> @f1() { -; CHECK-LABEL: f1: -; CHECK: vgbm %v24, 0 +; Test an all-ones vector. +define <4 x float> @f2() { +; CHECK-LABEL: f2: +; CHECK: vgbm %v24, 65535  ; CHECK: br %r14 -  ret <4 x float> <float zeroinitializer, float undef, -                   float zeroinitializer, float undef> +  ret <4 x float> <float 0xffffffffe0000000, float 0xffffffffe0000000, +                   float 0xffffffffe0000000, float 0xffffffffe0000000> +} + +; Test a mixed vector (mask 0xc731). +define <4 x float> @f3() { +; CHECK-LABEL: f3: +; CHECK: vgbm %v24, 50993 +; CHECK: br %r14 +  ret <4 x float> <float 0xffffe00000000000, float 0x381fffffe0000000, +                   float 0x379fffe000000000, float 0x371fe00000000000> +} + +; Test that undefs are treated as zero (mask 0xc031). +define <4 x float> @f4() { +; CHECK-LABEL: f4: +; CHECK: vgbm %v24, 49201 +; CHECK: br %r14 +  ret <4 x float> <float 0xffffe00000000000, float undef, +                   float 0x379fffe000000000, float 0x371fe00000000000> +} + +; Test that we don't use VGBM if one of the bytes is not 0 or 0xff. +define <4 x float> @f5() { +; CHECK-LABEL: f5: +; CHECK-NOT: vgbm +; CHECK: br %r14 +  ret <4 x float> <float 0xffffe00000000000, float 0x381fffffc0000000, +                   float 0x379fffe000000000, float 0x371fe00000000000>  }  ; Test an all-zeros v2f32 that gets promoted to v4f32. -define <2 x float> @f2() { -; CHECK-LABEL: f2: +define <2 x float> @f6() { +; CHECK-LABEL: f6:  ; CHECK: vgbm %v24, 0  ; CHECK: br %r14    ret <2 x float> zeroinitializer  } + +; Test a mixed v2f32 that gets promoted to v4f32 (mask 0xc700). +define <2 x float> @f7() { +; CHECK-LABEL: f7: +; CHECK: vgbm %v24, 50944 +; CHECK: br %r14 +  ret <2 x float> <float 0xffffe00000000000, float 0x381fffffe0000000> +} diff --git a/llvm/test/CodeGen/SystemZ/vec-const-06.ll b/llvm/test/CodeGen/SystemZ/vec-const-06.ll index 6144e0f4910..be53a0581ec 100644 --- a/llvm/test/CodeGen/SystemZ/vec-const-06.ll +++ b/llvm/test/CodeGen/SystemZ/vec-const-06.ll @@ -1,19 +1,43 @@ -; Test vector byte masks, v2f64 version. Only all-zero vectors are handled. +; Test vector byte masks, v2f64 version.  ;  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s  ; Test an all-zeros vector. -define <2 x double> @f0() { -; CHECK-LABEL: f0: +define <2 x double> @f1() { +; CHECK-LABEL: f1:  ; CHECK: vgbm %v24, 0  ; CHECK: br %r14    ret <2 x double> zeroinitializer  } -; Test that undefs are treated as zero. -define <2 x double> @f1() { -; CHECK-LABEL: f1: -; CHECK: vgbm %v24, 0 +; Test an all-ones vector. +define <2 x double> @f2() { +; CHECK-LABEL: f2: +; CHECK: vgbm %v24, 65535 +; CHECK: br %r14 +  ret <2 x double> <double 0xffffffffffffffff, double 0xffffffffffffffff> +} + +; Test a mixed vector (mask 0x8c76). +define <2 x double> @f3() { +; CHECK-LABEL: f3: +; CHECK: vgbm %v24, 35958 +; CHECK: br %r14 +  ret <2 x double> <double 0xff000000ffff0000, double 0x00ffffff00ffff00> +} + +; Test that undefs are treated as zero (mask 0x8c00). +define <2 x double> @f4() { +; CHECK-LABEL: f4: +; CHECK: vgbm %v24, 35840 +; CHECK: br %r14 +  ret <2 x double> <double 0xff000000ffff0000, double undef> +} + +; Test that we don't use VGBM if one of the bytes is not 0 or 0xff. +define <2 x double> @f5() { +; CHECK-LABEL: f5: +; CHECK-NOT: vgbm  ; CHECK: br %r14 -  ret <2 x double> <double zeroinitializer, double undef> +  ret <2 x double> <double 0xfe000000ffff0000, double 0x00ffffff00ffff00>  } diff --git a/llvm/test/CodeGen/SystemZ/vec-const-19.ll b/llvm/test/CodeGen/SystemZ/vec-const-19.ll new file mode 100644 index 00000000000..e48bfe9250d --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/vec-const-19.ll @@ -0,0 +1,18 @@ +; Test that a scalar FP constant can be reused from a vector splat constant +; of the same value. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +define void @fun()  { +; CHECK-LABEL: fun: +; CHECK: vgmg %v0, 2, 10 +; CHECK-NOT: vgmg %v0, 2, 10 + +  %tmp = fadd <2 x double> zeroinitializer, <double 1.000000e+00, double 1.000000e+00> +  %tmp1 = fmul <2 x double> %tmp, <double 5.000000e-01, double 5.000000e-01> +  store <2 x double> %tmp1, <2 x double>* undef +  %tmp2 = load double, double* undef +  %tmp3 = fmul double %tmp2, 5.000000e-01 +  store double %tmp3, double* undef +  ret void +}  | 

