diff options
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 113 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.h | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/neon-copy.ll | 270 | 
3 files changed, 29 insertions, 360 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index a794fcdc1f3..27277c47f39 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4154,70 +4154,21 @@ AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {    return false;  } -// Check whether a shuffle_vector could be presented as concat_vector. -bool AArch64TargetLowering::isConcatVector(SDValue Op,SelectionDAG &DAG, -                                           SDValue V0, SDValue V1, -                                           const int* Mask, -                                           SDValue &Res) const { -  SDLoc DL(Op); -  EVT VT = Op.getValueType(); -  unsigned NumElts = VT.getVectorNumElements(); -  unsigned V0NumElts = V0.getValueType().getVectorNumElements(); -  bool isContactVector = true; -  bool splitV0 = false; -  int offset = 0; -  for (int I = 0, E = NumElts; I != E; I++){ -    if (Mask[I] != I + offset) { -      if(I && !splitV0 && Mask[I] == I + (int)V0NumElts / 2) { -        splitV0 = true; -        offset = V0NumElts / 2; -      } else { -        isContactVector = false; -        break; -      } -    } -  } -  if (isContactVector) { -    EVT CastVT = EVT::getVectorVT(*DAG.getContext(), -                                  VT.getVectorElementType(), NumElts / 2); -    if(CastVT.getSizeInBits() < 64) -      return false; - -    if (splitV0) { -      assert(V0NumElts >= NumElts / 2 && -             "invalid operand for extract_subvector!"); -      V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0, -                       DAG.getConstant(0, MVT::i64)); -    } -    if (NumElts != V1.getValueType().getVectorNumElements() * 2) { -      assert(V1.getValueType().getVectorNumElements() >= NumElts / 2 && -             "invalid operand for extract_subvector!"); -      V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1, -                       DAG.getConstant(0, MVT::i64)); -    } -    Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1); -    return true; -  } -  return false; -} - -// Check whether a Build Vector could be presented as Shuffle Vector. -// This Shuffle Vector maybe not legalized, so the length of its operand and -// the length of result may not equal. +// Check whether a Build Vector could be presented as Shuffle Vector. If yes, +// try to call LowerVECTOR_SHUFFLE to lower it.  bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, -                                                 SDValue &V0, SDValue &V1, -                                                 int *Mask) const { +                                                 SDValue &Res) const {    SDLoc DL(Op);    EVT VT = Op.getValueType();    unsigned NumElts = VT.getVectorNumElements();    unsigned V0NumElts = 0; +  int Mask[16]; +  SDValue V0, V1;    // Check if all elements are extracted from less than 3 vectors.    for (unsigned i = 0; i < NumElts; ++i) {      SDValue Elt = Op.getOperand(i); -    if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || -        Elt.getOperand(0).getValueType().getVectorElementType() != -            VT.getVectorElementType()) +    if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)        return false;      if (V0.getNode() == 0) { @@ -4238,7 +4189,25 @@ bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG,        return false;      }    } -  return true; + +  if (!V1.getNode() && V0NumElts == NumElts * 2) { +    V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0, +                     DAG.getConstant(NumElts, MVT::i64)); +    V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0, +                     DAG.getConstant(0, MVT::i64)); +    V0NumElts = V0.getValueType().getVectorNumElements(); +  } + +  if (V1.getNode() && NumElts == V0NumElts && +      V0NumElts == V1.getValueType().getVectorNumElements()) { +    SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask); +    if(Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE) +      Res = Shuffle; +    else +      Res = LowerVECTOR_SHUFFLE(Shuffle, DAG); +    return true; +  } else +    return false;  }  // If this is a case we can't handle, return null and let the default @@ -4444,31 +4413,9 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,      return SDValue();    // Try to lower this in lowering ShuffleVector way. -  SDValue V0, V1; -  int Mask[16]; -  if (isKnownShuffleVector(Op, DAG, V0, V1, Mask)) { -    unsigned V0NumElts = V0.getValueType().getVectorNumElements(); -    if (!V1.getNode() && V0NumElts == NumElts * 2) { -      V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0, -                       DAG.getConstant(NumElts, MVT::i64)); -      V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0, -                       DAG.getConstant(0, MVT::i64)); -      V0NumElts = V0.getValueType().getVectorNumElements(); -    } - -    if (V1.getNode() && NumElts == V0NumElts && -        V0NumElts == V1.getValueType().getVectorNumElements()) { -      SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask); -      if(Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE) -        return Shuffle; -      else -        return LowerVECTOR_SHUFFLE(Shuffle, DAG); -    } else { -      SDValue Res; -      if(isConcatVector(Op, DAG, V0, V1, Mask, Res)) -        return Res; -    } -  } +  SDValue Shuf; +  if (isKnownShuffleVector(Op, DAG, Shuf)) +    return Shuf;    // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we    // know the default expansion would otherwise fall back on something even @@ -4654,10 +4601,6 @@ AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,        return DAG.getNode(ISDNo, dl, VT, V1, V2);    } -  SDValue Res; -  if (isConcatVector(Op, DAG, V1, V2, &ShuffleMask[0], Res)) -    return Res; -    // If the element of shuffle mask are all the same constant, we can    // transform it into either NEON_VDUP or NEON_VDUPLANE    if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 8961d9c1415..3879663e570 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -232,11 +232,7 @@ public:                            SDLoc dl, SelectionDAG &DAG,                            SmallVectorImpl<SDValue> &InVals) const; -  bool isConcatVector(SDValue Op,SelectionDAG &DAG, SDValue V0, SDValue V1, -                      const int* Mask, SDValue &Res) const; - -  bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &V0, -                            SDValue &V1, int *Mask) const; +  bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &Res) const;    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,                              const AArch64Subtarget *ST) const; diff --git a/llvm/test/CodeGen/AArch64/neon-copy.ll b/llvm/test/CodeGen/AArch64/neon-copy.ll index bda56564449..0799eb3b2d6 100644 --- a/llvm/test/CodeGen/AArch64/neon-copy.ll +++ b/llvm/test/CodeGen/AArch64/neon-copy.ll @@ -975,14 +975,6 @@ entry:  declare float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float>) -define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 { -; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: -  %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> -  ret <16 x i8> %vecinit30 -} -  define <2 x i32> @test_concat_undef_v1i32(<1 x i32> %a) {  ; CHECK-LABEL: test_concat_undef_v1i32:  ; CHECK: ins v{{[0-9]+}}.s[1], v{{[0-9]+}}.s[0] @@ -1029,268 +1021,6 @@ entry:    ret <2 x i32> %h  } -define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 { -; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: -  %vecext = extractelement <8 x i8> %x, i32 0 -  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 -  %vecext1 = extractelement <8 x i8> %x, i32 1 -  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 -  %vecext3 = extractelement <8 x i8> %x, i32 2 -  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 -  %vecext5 = extractelement <8 x i8> %x, i32 3 -  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 -  %vecext7 = extractelement <8 x i8> %x, i32 4 -  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 -  %vecext9 = extractelement <8 x i8> %x, i32 5 -  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 -  %vecext11 = extractelement <8 x i8> %x, i32 6 -  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 -  %vecext13 = extractelement <8 x i8> %x, i32 7 -  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 -  %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> -  ret <16 x i8> %vecinit30 -} - -define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 { -; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: -  %vecext = extractelement <16 x i8> %x, i32 0 -  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 -  %vecext1 = extractelement <16 x i8> %x, i32 1 -  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 -  %vecext3 = extractelement <16 x i8> %x, i32 2 -  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 -  %vecext5 = extractelement <16 x i8> %x, i32 3 -  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 -  %vecext7 = extractelement <16 x i8> %x, i32 4 -  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 -  %vecext9 = extractelement <16 x i8> %x, i32 5 -  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 -  %vecext11 = extractelement <16 x i8> %x, i32 6 -  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 -  %vecext13 = extractelement <16 x i8> %x, i32 7 -  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 -  %vecext15 = extractelement <8 x i8> %y, i32 0 -  %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 -  %vecext17 = extractelement <8 x i8> %y, i32 1 -  %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 -  %vecext19 = extractelement <8 x i8> %y, i32 2 -  %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 -  %vecext21 = extractelement <8 x i8> %y, i32 3 -  %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 -  %vecext23 = extractelement <8 x i8> %y, i32 4 -  %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 -  %vecext25 = extractelement <8 x i8> %y, i32 5 -  %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 -  %vecext27 = extractelement <8 x i8> %y, i32 6 -  %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 -  %vecext29 = extractelement <8 x i8> %y, i32 7 -  %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 -  ret <16 x i8> %vecinit30 -} - -define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 { -; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: -  %vecext = extractelement <8 x i8> %x, i32 0 -  %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 -  %vecext1 = extractelement <8 x i8> %x, i32 1 -  %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 -  %vecext3 = extractelement <8 x i8> %x, i32 2 -  %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 -  %vecext5 = extractelement <8 x i8> %x, i32 3 -  %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 -  %vecext7 = extractelement <8 x i8> %x, i32 4 -  %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 -  %vecext9 = extractelement <8 x i8> %x, i32 5 -  %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 -  %vecext11 = extractelement <8 x i8> %x, i32 6 -  %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 -  %vecext13 = extractelement <8 x i8> %x, i32 7 -  %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 -  %vecext15 = extractelement <8 x i8> %y, i32 0 -  %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 -  %vecext17 = extractelement <8 x i8> %y, i32 1 -  %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 -  %vecext19 = extractelement <8 x i8> %y, i32 2 -  %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 -  %vecext21 = extractelement <8 x i8> %y, i32 3 -  %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 -  %vecext23 = extractelement <8 x i8> %y, i32 4 -  %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 -  %vecext25 = extractelement <8 x i8> %y, i32 5 -  %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 -  %vecext27 = extractelement <8 x i8> %y, i32 6 -  %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 -  %vecext29 = extractelement <8 x i8> %y, i32 7 -  %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 -  ret <16 x i8> %vecinit30 -} - -define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 { -; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: -  %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> -  ret <8 x i16> %vecinit14 -} - -define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 { -; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: - %vecext = extractelement <4 x i16> %x, i32 0 -  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 -  %vecext1 = extractelement <4 x i16> %x, i32 1 -  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 -  %vecext3 = extractelement <4 x i16> %x, i32 2 -  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 -  %vecext5 = extractelement <4 x i16> %x, i32 3 -  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 -  %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> -  ret <8 x i16> %vecinit14 -} - -define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 { -; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: -  %vecext = extractelement <8 x i16> %x, i32 0 -  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 -  %vecext1 = extractelement <8 x i16> %x, i32 1 -  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 -  %vecext3 = extractelement <8 x i16> %x, i32 2 -  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 -  %vecext5 = extractelement <8 x i16> %x, i32 3 -  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 -  %vecext7 = extractelement <4 x i16> %y, i32 0 -  %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 -  %vecext9 = extractelement <4 x i16> %y, i32 1 -  %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 -  %vecext11 = extractelement <4 x i16> %y, i32 2 -  %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 -  %vecext13 = extractelement <4 x i16> %y, i32 3 -  %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 -  ret <8 x i16> %vecinit14 -} - -define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 { -; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: -  %vecext = extractelement <4 x i16> %x, i32 0 -  %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 -  %vecext1 = extractelement <4 x i16> %x, i32 1 -  %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 -  %vecext3 = extractelement <4 x i16> %x, i32 2 -  %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 -  %vecext5 = extractelement <4 x i16> %x, i32 3 -  %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 -  %vecext7 = extractelement <4 x i16> %y, i32 0 -  %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 -  %vecext9 = extractelement <4 x i16> %y, i32 1 -  %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 -  %vecext11 = extractelement <4 x i16> %y, i32 2 -  %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 -  %vecext13 = extractelement <4 x i16> %y, i32 3 -  %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 -  ret <8 x i16> %vecinit14 -} - -define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 { -; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: -  %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5> -  ret <4 x i32> %vecinit6 -} - -define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 { -; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: -  %vecext = extractelement <2 x i32> %x, i32 0 -  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 -  %vecext1 = extractelement <2 x i32> %x, i32 1 -  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 -  %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5> -  ret <4 x i32> %vecinit6 -} - -define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 { -; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: -  %vecext = extractelement <4 x i32> %x, i32 0 -  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 -  %vecext1 = extractelement <4 x i32> %x, i32 1 -  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 -  %vecext3 = extractelement <2 x i32> %y, i32 0 -  %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2 -  %vecext5 = extractelement <2 x i32> %y, i32 1 -  %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3 -  ret <4 x i32> %vecinit6 -} - -define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 { -; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: -  %vecext = extractelement <2 x i32> %x, i32 0 -  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 -  %vecext1 = extractelement <2 x i32> %x, i32 1 -  %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 -  %vecext3 = extractelement <2 x i32> %y, i32 0 -  %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2 -  %vecext5 = extractelement <2 x i32> %y, i32 1 -  %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3 -  ret <4 x i32> %vecinit6 -} - -define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 { -; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: -  %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2> -  ret <2 x i64> %vecinit2 -} - -define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 { -; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: -  %vecext = extractelement <1 x i64> %x, i32 0 -  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 -  %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2> -  ret <2 x i64> %vecinit2 -} - -define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 { -; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: -  %vecext = extractelement <2 x i64> %x, i32 0 -  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 -  %vecext1 = extractelement <1 x i64> %y, i32 0 -  %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 -  ret <2 x i64> %vecinit2 -} - -define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 { -; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -entry: -  %vecext = extractelement <1 x i64> %x, i32 0 -  %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 -  %vecext1 = extractelement <1 x i64> %y, i32 0 -  %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 -  ret <2 x i64> %vecinit2 -} -  declare <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16>, <1 x i16>)  define <1 x i16> @test_copy_FPR16_FPR16(<1 x i16> %a, <1 x i16> %b) {  | 

